Spaces:

Chaerin5
/

FoundHand

Runtime error

App Files Files Community

Chaerin5 commited on Apr 4, 2025

Commit

6a1d13f

1 Parent(s): 133d942

allow manual keypoints at edit hands; put fixed hand to original image

Browse files

Files changed (7) hide show

.gitignore +6 -0
README.md +1 -1
app.py +563 -254
brown_logo.png +3 -0
meta_logo.png +3 -0
sbatch/sbatch_demo.sh +38 -0
vqvae.py +4 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+settings.json
+sbatch/err/
+sbatch/out/
+__pycache__/
+diffusion/__pycache__/
+*.pyc

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: ✋
 colorFrom: gray
 colorTo: purple
 sdk: gradio
-sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 short_description: FoundHand

 colorFrom: gray
 colorTo: purple
 sdk: gradio
+sdk_version: 4.40.1
 app_file: app.py
 pinned: false
 short_description: FoundHand

app.py CHANGED Viewed

@@ -20,7 +20,10 @@ from copy import deepcopy
 from typing import Optional
 import requests
 from huggingface_hub import hf_hub_download
-import spaces
 MAX_N = 6
 FIX_MAX_N = 6
@@ -29,6 +32,12 @@ placeholder = cv2.cvtColor(cv2.imread("placeholder.png"), cv2.COLOR_BGR2RGB)
 NEW_MODEL = True
 MODEL_EPOCH = 6
 REF_POSE_MASK = True
 def set_seed(seed):
     seed = int(seed)
@@ -112,7 +121,7 @@ def visualize_hand(all_joints, img, side=["right", "left"], n_avail_joints=21):
     # Convert BytesIO object to numpy array
     buf.seek(0)
     img_pil = Image.open(buf)
-    img_pil = img_pil.resize((H, W))
     numpy_img = np.array(img_pil)
     return numpy_img
@@ -232,31 +241,9 @@ if NEW_MODEL:
     print(f"encoder after eval() max: {max([p.max() for p in autoencoder.encoder.parameters()])}")
     print(f"autoencoder encoder after eval() dtype: {next(autoencoder.encoder.parameters()).dtype}")
     assert len(missing_keys) == 0
-# else:
-#     opts = HandDiffOpts()
-#     model_path = './finetune_epoch=5-step=130000.ckpt'
-#     sd_path = './sd-v1-4.ckpt'
-#     print('Load diffusion model...')
-#     diffusion = create_diffusion(str(opts.test_sampling_steps))
-#     model = vit.DiT_XL_2(
-#         input_size=opts.latent_size[0],
-#         latent_dim=opts.latent_dim,
-#         in_channels=opts.latent_dim+opts.n_keypoints+opts.n_mask,
-#         learn_sigma=True,
-#     ).to(device)
-#     ckpt_state_dict = torch.load(model_path)['state_dict']
-#     dit_state_dict = {remove_prefix(k, 'diffusion_backbone.'): v for k, v in ckpt_state_dict.items() if k.startswith('diffusion_backbone')}
-#     vae_state_dict = {remove_prefix(k, 'autoencoder.'): v for k, v in ckpt_state_dict.items() if k.startswith('autoencoder')}
-#     missing_keys, extra_keys = model.load_state_dict(dit_state_dict, strict=False)
-#     model.eval()
-#     assert len(missing_keys) == 0 and len(extra_keys) == 0
-#     autoencoder = vqvae.create_model(3, 3, opts.latent_dim).eval().requires_grad_(False).to(device)
-#     missing_keys, extra_keys = autoencoder.load_state_dict(vae_state_dict, strict=False)
-#     autoencoder.eval()
-#     assert len(missing_keys) == 0 and len(extra_keys) == 0
-sam_path = hf_hub_download(repo_id="Chaerin5/FoundHand-weights", filename="sam_vit_h_4b8939.pth", token=token)
-sam_predictor = init_sam(ckpt_path=sam_path, device='cpu')
 print("Mediapipe hand detector and SAM ready...")
 mp_hands = mp.solutions.hands
@@ -266,17 +253,12 @@ hands = mp_hands.Hands(
     min_detection_confidence=0.1,
 )
-def prepare_ref_anno(ref):
     if ref is None:
         return (
-            None,
-            None,
-            None,
-            None,
-            None,
         )
-    missing_keys, extra_keys = autoencoder.load_state_dict(vae_state_dict, strict=False)
     img = ref["composite"][..., :3]
     img = cv2.resize(img, opts.image_size, interpolation=cv2.INTER_AREA)
     keypts = np.zeros((42, 2))
@@ -307,6 +289,7 @@ def get_ref_anno(img, keypts):
     if keypts is None:
         no_hands = cv2.resize(np.array(Image.open("no_hands.png"))[..., :3], (LENGTH, LENGTH))
         return None, no_hands, None
     if isinstance(keypts, list):
         if len(keypts[0]) == 0:
             keypts[0] = np.zeros((21, 2))
@@ -315,7 +298,6 @@ def get_ref_anno(img, keypts):
         else:
             gr.Info("Number of right hand keypoints should be either 0 or 21.")
             return None, None, None
         if len(keypts[1]) == 0:
             keypts[1] = np.zeros((21, 2))
         elif len(keypts[1]) == 21:
@@ -323,7 +305,6 @@ def get_ref_anno(img, keypts):
         else:
             gr.Info("Number of left hand keypoints should be either 0 or 21.")
             return None, None, None
         keypts = np.concatenate(keypts, axis=0)
     if REF_POSE_MASK:
         sam_predictor.set_image(img)
@@ -362,7 +343,7 @@ def get_ref_anno(img, keypts):
                 Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
             ]
         )
-        image = image_transform(img) # .to(device)
         kpts_valid = check_keypoints_validity(keypts, target_size)
         heatmaps = torch.tensor(
             keypoint_heatmap(
@@ -370,7 +351,7 @@ def get_ref_anno(img, keypts):
             )
             * kpts_valid[:, None, None],
             dtype=torch.float,
-            # device=device
         )[None, ...]
         mask = torch.tensor(
             cv2.resize(
@@ -379,7 +360,7 @@ def get_ref_anno(img, keypts):
                 interpolation=cv2.INTER_NEAREST,
             ),
             dtype=torch.float,
-            # device=device,
         ).unsqueeze(0)[None, ...]
         return image[None, ...], heatmaps, mask
@@ -388,7 +369,7 @@ def get_ref_anno(img, keypts):
         img,
         keypts,
         hand_mask,
-        device="cuda",
         target_size=opts.image_size,
         latent_size=opts.latent_size,
     )
@@ -409,62 +390,49 @@ def get_ref_anno(img, keypts):
     return img, ref_pose, ref_cond
-def get_target_anno(target):
-    if target is None:
-        return (
-            gr.State.update(value=None),
-            gr.Image.update(value=None),
-            gr.State.update(value=None),
-            gr.State.update(value=None),
-        )
-    pose_img = target["composite"][..., :3]
-    pose_img = cv2.resize(pose_img, opts.image_size, interpolation=cv2.INTER_AREA)
-    # detect keypoints
-    mp_pose = hands.process(pose_img)
-    target_keypts = np.zeros((42, 2))
-    detected = np.array([0, 0])
-    start_idx = 0
-    if mp_pose.multi_hand_landmarks:
-        # handedness is flipped assuming the input image is mirrored in MediaPipe
-        for hand_landmarks, handedness in zip(
-            mp_pose.multi_hand_landmarks, mp_pose.multi_handedness
-        ):
-            # actually right hand
-            if handedness.classification[0].label == "Left":
-                start_idx = 0
-                detected[0] = 1
-            # actually left hand
-            elif handedness.classification[0].label == "Right":
-                start_idx = 21
-                detected[1] = 1
-            for i, landmark in enumerate(hand_landmarks.landmark):
-                target_keypts[start_idx + i] = [
-                    landmark.x * opts.image_size[1],
-                    landmark.y * opts.image_size[0],
-                ]
-        target_pose = visualize_hand(target_keypts, pose_img)
-        kpts_valid = check_keypoints_validity(target_keypts, opts.image_size)
-        target_heatmaps = torch.tensor(
-            keypoint_heatmap(
-                scale_keypoint(target_keypts, opts.image_size, opts.latent_size),
-                opts.latent_size,
-                var=1.0,
-            )
-            * kpts_valid[:, None, None],
-            dtype=torch.float,
-            # device=device,
-        )[None, ...]
-        target_cond = torch.cat(
-            [target_heatmaps, torch.zeros_like(target_heatmaps)[:, :1]], 1
         )
-    else:
-        raise gr.Error("No hands detected in the target image.")
-    return pose_img, target_pose, target_cond, target_keypts
 def get_mask_inpaint(ref):
     inpaint_mask = np.array(ref["layers"][0])[..., -1]
     inpaint_mask = cv2.resize(
         inpaint_mask, opts.image_size, interpolation=cv2.INTER_AREA
@@ -473,12 +441,12 @@ def get_mask_inpaint(ref):
     return inpaint_mask
-def visualize_ref(crop, brush):
-    if crop is None or brush is None:
         return None
     inpainted = brush["layers"][0][..., -1]
-    img = crop["background"][..., :3]
-    img = cv2.resize(img, inpainted.shape[::-1], interpolation=cv2.INTER_AREA)
     mask = inpainted < 128
     # img = img.astype(np.int32)
     # img[mask, :] = img[mask, :] - 50
@@ -539,7 +507,39 @@ def reset_kps(img, keypoints, side: Literal["right", "left"]):
         keypoints[1] = []
     return img, keypoints
-@spaces.GPU(duration=60)
 def sample_diff(ref_cond, target_cond, target_keypts, num_gen, seed, cfg):
     set_seed(seed)
     z = torch.randn(
@@ -586,14 +586,17 @@ def sample_diff(ref_cond, target_cond, target_keypts, num_gen, seed, cfg):
     print(f"results[0].max(): {results[0].max()}")
     return results, results_pose
-@spaces.GPU(duration=120)
-def ready_sample(img_ori, inpaint_mask, keypts):
-    img = cv2.resize(img_ori[..., :3], opts.image_size, interpolation=cv2.INTER_AREA)
     sam_predictor.set_image(img)
     if len(keypts[0]) == 0:
         keypts[0] = np.zeros((21, 2))
     elif len(keypts[0]) == 21:
         keypts[0] = np.array(keypts[0], dtype=np.float32)
     else:
         gr.Info("Number of right hand keypoints should be either 0 or 21.")
         return None, None
@@ -602,12 +605,14 @@ def ready_sample(img_ori, inpaint_mask, keypts):
         keypts[1] = np.zeros((21, 2))
     elif len(keypts[1]) == 21:
         keypts[1] = np.array(keypts[1], dtype=np.float32)
     else:
         gr.Info("Number of left hand keypoints should be either 0 or 21.")
         return None, None
     keypts = np.concatenate(keypts, axis=0)
-    keypts = scale_keypoint(keypts, (LENGTH, LENGTH), opts.image_size)
     box_shift_ratio = 0.5
     box_size_factor = 1.2
@@ -643,7 +648,7 @@ def ready_sample(img_ori, inpaint_mask, keypts):
             inpaint_mask, dsize=opts.latent_size, interpolation=cv2.INTER_NEAREST
         ),
         dtype=torch.float,
-        # device=device,
     ).unsqueeze(0)[None, ...]
     def make_ref_cond(
@@ -661,7 +666,7 @@ def ready_sample(img_ori, inpaint_mask, keypts):
                 Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
             ]
         )
-        image = image_transform(img)
         kpts_valid = check_keypoints_validity(keypts, target_size)
         heatmaps = torch.tensor(
             keypoint_heatmap(
@@ -669,7 +674,7 @@ def ready_sample(img_ori, inpaint_mask, keypts):
             )
             * kpts_valid[:, None, None],
             dtype=torch.float,
-            # device=device,
         )[None, ...]
         mask = torch.tensor(
             cv2.resize(
@@ -678,7 +683,7 @@ def ready_sample(img_ori, inpaint_mask, keypts):
                 interpolation=cv2.INTER_NEAREST,
             ),
             dtype=torch.float,
-            # device=device,
         ).unsqueeze(0)[None, ...]
         return image[None, ...], heatmaps, mask
@@ -686,7 +691,7 @@ def ready_sample(img_ori, inpaint_mask, keypts):
         img,
         keypts,
         hand_mask * (1 - inpaint_mask),
-        device=device,
         target_size=opts.image_size,
         latent_size=opts.latent_size,
     )
@@ -726,13 +731,15 @@ def switch_mask_size(radio):
         out = (gr.update(visible=True), gr.update(visible=False))
     return out
-@spaces.GPU(duration=300)
 def sample_inpaint(
     ref_cond,
     target_cond,
     latent,
     inpaint_latent_mask,
     keypts,
     num_gen,
     seed,
     cfg,
@@ -778,39 +785,76 @@ def sample_inpaint(
     # visualize
     results = []
     results_pose = []
     for i in range(FIX_MAX_N):
         if i < num_gen:
-            results.append(sampled_images[i])
-            results_pose.append(visualize_hand(keypts, sampled_images[i]))
         else:
             results.append(placeholder)
             results_pose.append(placeholder)
-    return results, results_pose
 def flip_hand(
-    img, pose_img, cond: Optional[torch.Tensor], keypts: Optional[torch.Tensor] = None, pose_manual_img = None,
-    manual_kp_right=None, manual_kp_left=None
 ):
     if cond is None:  # clear clicked
-        return None, None, None, None
     img["composite"] = img["composite"][:, ::-1, :]
     img["background"] = img["background"][:, ::-1, :]
     img["layers"] = [layer[:, ::-1, :] for layer in img["layers"]]
     pose_img = pose_img[:, ::-1, :]
     cond = cond.flip(-1)
-    if keypts is not None:  # cond is target_cond
         if keypts[:21, :].sum() != 0:
             keypts[:21, 0] = opts.image_size[1] - keypts[:21, 0]
-            # keypts[:21, 1] = opts.image_size[0] - keypts[:21, 1]
         if keypts[21:, :].sum() != 0:
             keypts[21:, 0] = opts.image_size[1] - keypts[21:, 0]
-            # keypts[21:, 1] = opts.image_size[0] - keypts[21:, 1]
-    if pose_manual_img is not None:
-        pose_manual_img = pose_manual_img[:, ::-1, :]
-        manual_kp_right = manual_kp_right[:, ::-1, :]
-        manual_kp_left = manual_kp_left[:, ::-1, :]
-    return img, pose_img, cond, keypts, pose_manual_img, manual_kp_right, manual_kp_left
 def resize_to_full(img):
@@ -823,26 +867,30 @@ def resize_to_full(img):
 def clear_all():
     return (
         None,
         None,
         None,
         None,
         None,
-        False,
         None,
         None,
         False,
         None,
         None,
         None,
         None,
         None,
         None,
         None,
         1,
         42,
         3.0,
         gr.update(interactive=False),
-        []
     )
@@ -851,6 +899,9 @@ def fix_clear_all():
         None,
         None,
         None,
         None,
         None,
         None,
@@ -876,14 +927,14 @@ def fix_clear_all():
 def enable_component(image1, image2):
     if image1 is None or image2 is None:
         return gr.update(interactive=False)
-    if "background" in image1 and "layers" in image1 and "composite" in image1:
         if (
             image1["background"].sum() == 0
             and (sum([im.sum() for im in image1["layers"]]) == 0)
             and image1["composite"].sum() == 0
         ):
             return gr.update(interactive=False)
-    if "background" in image2 and "layers" in image2 and "composite" in image2:
         if (
             image2["background"].sum() == 0
             and (sum([im.sum() for im in image2["layers"]]) == 0)
@@ -940,6 +991,18 @@ def set_visible(checkbox, kpts, img_clean, img_pose_right, img_pose_left, done=N
 def set_unvisible():
     return (
         gr.update(visible=False),
         gr.update(visible=False),
         gr.update(visible=False),
@@ -954,6 +1017,18 @@ def set_unvisible():
         gr.update(visible=False)
     )
 def set_no_hands(decider, component):
     if decider is None:
         no_hands = cv2.resize(np.array(Image.open("no_hands.png"))[..., :3], (LENGTH, LENGTH))
@@ -975,19 +1050,6 @@ def unvisible_component(decider, component):
         update_component = gr.update(visible=True)
     return update_component
-# def make_change(decider, state):
-#     '''
-#     if decider is not None, change the state's value. True/False does not matter.
-#     '''
-#     if decider is not None:
-#         if state:
-#             state = False
-#         else:
-#             state = True
-#         return state
-#     else:
-#         return state
 LENGTH = 480
 example_ref_imgs = [
@@ -1083,7 +1145,7 @@ fix_example_imgs = [
     # ["bad_hands/4.jpg"],  # "bad_hands/4_mask.jpg"],
     ["bad_hands/5.jpg"],  # "bad_hands/5_mask.jpg"],
     ["bad_hands/6.jpg"],  # "bad_hands/6_mask.jpg"],
-    ["bad_hands/7.jpg"],  # "bad_hands/7_mask.jpg"],
     # ["bad_hands/8.jpg"],  # "bad_hands/8_mask.jpg"],
     # ["bad_hands/9.jpg"],  # "bad_hands/9_mask.jpg"],
     # ["bad_hands/10.jpg"],  # "bad_hands/10_mask.jpg"],
@@ -1137,20 +1199,32 @@ _CITE_ = r"""
 with gr.Blocks(css=custom_css, theme="soft") as demo:
     gr.Markdown(_HEADER_)
     with gr.Tab("Edit Hand Poses"):
         ref_img = gr.State(value=None)
         ref_im_raw = gr.State(value=None)
         ref_kp_raw = gr.State(value=0)
         ref_kp_got = gr.State(value=None)
-        dump = gr.State(value=None)
-        ref_cond = gr.State(value=None)
         ref_manual_cond = gr.State(value=None)
         ref_auto_cond = gr.State(value=None)
-        keypts = gr.State(value=None)
         target_img = gr.State(value=None)
-        target_cond = gr.State(value=None)
         target_keypts = gr.State(value=None)
-        dump = gr.State(value=None)
         with gr.Row():
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">1. Upload a hand image to edit 📥</p>"""
@@ -1270,6 +1344,8 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 ref_flip = gr.Checkbox(
                     value=False, label="Flip Handedness (Reference)", interactive=False
                 )
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">2. Upload a hand image for target hand pose 📥</p>"""
@@ -1294,20 +1370,105 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 target_finish_crop = gr.Button(
                     value="Finish Cropping", interactive=False
                 )
-                target_pose = gr.Image(
-                    type="numpy",
-                    label="Target Pose",
-                    show_label=True,
-                    height=LENGTH,
-                    width=LENGTH,
-                    interactive=False,
-                )
                 gr.Markdown(
                     """<p style="text-align: center;">&#9314; Optionally flip the hand</p>"""
                 )
                 target_flip = gr.Checkbox(
                     value=False, label="Flip Handedness (Target)", interactive=False
                 )
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">3. Press &quot;Run&quot; to get the edited results 🎯</p>"""
@@ -1371,10 +1532,18 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                     interactive=True,
                 )
         ref.change(enable_component, [ref, ref], ref_finish_crop)
-        ref_finish_crop.click(prepare_ref_anno, [ref], [ref_im_raw, ref_kp_raw])
         ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_right)
         ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_left)
         ref_manual_checkbox.select(
             set_visible,
             [ref_manual_checkbox, ref_kp_got, ref_im_raw, ref_manual_kp_right, ref_manual_kp_left, ref_manual_done],
@@ -1412,38 +1581,94 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
         ref_manual_reset_left.click(
             reset_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
         )
         ref_manual_done.click(get_ref_anno, [ref_im_raw, ref_kp_got], [ref_img, ref_manual_pose, ref_manual_cond])
-        ref_manual_cond.change(lambda x: x, ref_manual_cond, ref_cond)
-        ref_use_manual.click(lambda x: x, ref_manual_cond, ref_cond)
-        # ref_use_manual.click(lambda x: gr.Info("Manual hand keypoints will be used for 'Reference'", duration=3))
-        ref_manual_done.click(visible_component, [ref_manual_pose, ref_manual_pose], ref_manual_pose)
-        ref_manual_done.click(visible_component, [ref_use_manual, ref_use_manual], ref_use_manual)
         ref_manual_pose.change(enable_component, [ref_manual_pose, ref_manual_pose], ref_manual_done)
-        ref_kp_raw.change(get_ref_anno, [ref_im_raw, ref_kp_raw], [ref_img, ref_pose, ref_auto_cond])
-        ref_auto_cond.change(lambda x: x, ref_auto_cond, ref_cond)
-        ref_use_auto.click(lambda x: x, ref_auto_cond, ref_cond)
-        # ref_use_auto.click(lambda x: gr.Info("Automatic hand keypoints will be used for 'Reference'", duration=3))
-        ref_pose.change(enable_component, [ref_kp_raw, ref_pose], ref_use_auto)
-        ref_pose.change(enable_component, [ref_img, ref_pose], ref_flip)
         ref_manual_pose.change(enable_component, [ref_img, ref_manual_pose], ref_flip)
         ref_flip.select(
-            flip_hand, [ref, ref_pose, ref_cond, gr.State(value=None), ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left], [ref, ref_pose, ref_cond, dump, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left]
         )
         target.change(enable_component, [target, target], target_finish_crop)
-        target_finish_crop.click(
-            get_target_anno,
-            [target],
-            [target_img, target_pose, target_cond, target_keypts],
-        )
         target_pose.change(enable_component, [target_img, target_pose], target_flip)
         target_flip.select(
             flip_hand,
-            [target, target_pose, target_cond, target_keypts],
-            [target, target_pose, target_cond, target_keypts],
         )
-        ref_pose.change(enable_component, [ref_pose, target_pose], run)
-        ref_manual_pose.change(enable_component, [ref_manual_pose, target_pose], run)
-        target_pose.change(enable_component, [ref_pose, target_pose], run)
         run.click(
             sample_diff,
             [ref_cond, target_cond, target_keypts, n_generation, seed, cfg],
@@ -1454,34 +1679,40 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
             [],
             [
                 ref,
                 ref_manual_kp_right,
                 ref_manual_kp_left,
                 ref_pose,
                 ref_manual_pose,
                 ref_flip,
                 target,
                 target_pose,
                 target_flip,
                 results,
                 results_pose,
-                ref_img,
-                ref_cond,
-                target_img,
-                target_cond,
-                target_keypts,
                 n_generation,
                 seed,
                 cfg,
                 ref_kp_raw,
-                ref_manual_checkbox
             ],
         )
         clear.click(
             set_unvisible,
             [],
             [
-                ref_manual_kp_r_info,
                 ref_manual_kp_l_info,
                 ref_manual_undo_left,
                 ref_manual_undo_right,
                 ref_manual_reset_left,
@@ -1490,14 +1721,25 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 ref_manual_done_info,
                 ref_manual_pose,
                 ref_use_manual,
-                ref_manual_kp_right,
-                ref_manual_kp_left
             ]
         )
     with gr.Tab("Fix Hands"):
         fix_inpaint_mask = gr.State(value=None)
         fix_original = gr.State(value=None)
         fix_img = gr.State(value=None)
         fix_kpts = gr.State(value=None)
         fix_kpts_np = gr.State(value=None)
@@ -1506,37 +1748,62 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
         fix_latent = gr.State(value=None)
         fix_inpaint_latent = gr.State(value=None)
         with gr.Row():
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">1. Upload a malformed hand image to fix 📥</p>"""
                 )
                 gr.Markdown(
-                    """<p style="text-align: center;">&#9312; Optionally crop the image around the hand</p>"""
                 )
-                fix_crop = gr.ImageEditor(
                     type="numpy",
                     sources=["upload", "webcam", "clipboard"],
-                    label="Image crop",
                     show_label=True,
                     height=LENGTH,
                     width=LENGTH,
-                    layers=False,
-                    crop_size="1:1",
-                    brush=False,
-                    image_mode="RGBA",
-                    container=False,
                 )
                 fix_example = gr.Examples(
                     fix_example_imgs,
                     inputs=[fix_crop],
                     examples_per_page=20,
                 )
                 gr.Markdown(
-                    """<p style="text-align: center;">&#9313; Brush area (e.g., wrong finger) that needs to be fixed. This will serve as an inpaint mask</p>"""
                 )
                 fix_ref = gr.ImageEditor(
                     type="numpy",
-                    label="Image brush",
                     sources=(),
                     show_label=True,
                     height=LENGTH,
@@ -1550,9 +1817,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                     container=False,
                     interactive=False,
                 )
                 fix_finish_crop = gr.Button(
                     value="Finish Croping & Brushing", interactive=False
                 )
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">2. Click on hand to get target hand pose</p>"""
@@ -1565,13 +1837,14 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                     show_label=False,
                     interactive=False,
                 )
-                gr.Markdown(
-                    """<p style="text-align: center;">&#9313; On the image, click 21 hand keypoints. This will serve as target hand poses. See the \"OpenPose keypoints convention\" for guidance.</p>"""
-                )
                 fix_kp_r_info = gr.Markdown(
-                    """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select right only</p>""",
-                    visible=False,
                 )
                 fix_kp_right = gr.Image(
                     type="numpy",
                     label="Keypoint Selection (right hand)",
@@ -1590,7 +1863,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                         value="Reset", interactive=False, visible=False
                     )
                 fix_kp_l_info = gr.Markdown(
-                    """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select left only</p>""",
                     visible=False
                 )
                 fix_kp_left = gr.Image(
@@ -1621,13 +1894,15 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                     width=LENGTH // 2,
                     interactive=False,
                 )
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">3. Press &quot;Ready&quot; to start pre-processing</p>"""
                 )
                 fix_ready = gr.Button(value="Ready", interactive=False)
                 gr.Markdown(
-                    """<p style="text-align: center; font-weight: bold; ">Visualized (256, 256) Inpaint Mask</p>"""
                 )
                 fix_vis_mask32 = gr.Image(
                     type="numpy",
@@ -1646,9 +1921,11 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                     width=opts.image_size,
                     interactive=False,
                 )
-                gr.Markdown(
-                    """<p style="text-align: center;">[NOTE] Above should be inpaint mask that you brushed, NOT the segmentation mask of the entire hand. </p>"""
-                )
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">4. Press &quot;Run&quot; to get the fixed hand image 🎯</p>"""
@@ -1657,6 +1934,16 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 gr.Markdown(
                     """<p style="text-align: center;">⚠️  >3min and ~24GB per generation</p>"""
                 )
                 fix_result = gr.Gallery(
                     type="numpy",
                     label="Results",
@@ -1682,55 +1969,58 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 )
                 fix_clear = gr.ClearButton()
-        gr.Markdown(
-            """<p style="text-align: left; font-size: 25px;"><b>More options</b></p>"""
-        )
-        gr.Markdown(
-            "⚠️ Currently, Number of generation > 1 could lead to out-of-memory"
-        )
-        with gr.Row():
-            fix_n_generation = gr.Slider(
-                label="Number of generations",
-                value=1,
-                minimum=1,
-                maximum=FIX_MAX_N,
-                step=1,
-                randomize=False,
-                interactive=True,
-            )
-            fix_seed = gr.Slider(
-                label="Seed",
-                value=42,
-                minimum=0,
-                maximum=10000,
-                step=1,
-                randomize=False,
-                interactive=True,
-            )
-            fix_cfg = gr.Slider(
-                label="Classifier free guidance scale",
-                value=3.0,
-                minimum=0.0,
-                maximum=10.0,
-                step=0.1,
-                randomize=False,
-                interactive=True,
-            )
-            fix_quality = gr.Slider(
-                label="Quality",
-                value=10,
-                minimum=1,
-                maximum=10,
-                step=1,
-                randomize=False,
-                interactive=True,
             )
-        fix_crop.change(enable_component, [fix_crop, fix_crop], fix_ref)
-        fix_crop.change(resize_to_full, fix_crop, fix_ref)
-        fix_ref.change(enable_component, [fix_ref, fix_ref], fix_finish_crop)
-        fix_finish_crop.click(get_mask_inpaint, [fix_ref], [fix_inpaint_mask])
-        fix_finish_crop.click(lambda x: x["background"], [fix_crop], [fix_original])
-        fix_finish_crop.click(visualize_ref, [fix_crop, fix_ref], [fix_img])
         fix_img.change(lambda x: x, [fix_img], [fix_kp_right])
         fix_img.change(lambda x: x, [fix_img], [fix_kp_left])
         fix_inpaint_mask.change(
@@ -1775,7 +2065,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
             ],
         )
         fix_kp_right.select(
-            get_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts]
         )
         fix_undo_right.click(
             undo_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts]
@@ -1797,7 +2087,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
         )
         fix_ready.click(
             ready_sample,
-            [fix_original, fix_inpaint_mask, fix_kpts],
             [
                 fix_ref_cond,
                 fix_target_cond,
@@ -1816,23 +2106,28 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 fix_latent,
                 fix_inpaint_latent,
                 fix_kpts_np,
                 fix_n_generation,
                 fix_seed,
                 fix_cfg,
                 fix_quality,
             ],
-            [fix_result, fix_result_pose],
         )
         fix_clear.click(
             fix_clear_all,
             [],
             [
                 fix_crop,
                 fix_ref,
                 fix_kp_right,
                 fix_kp_left,
                 fix_result,
                 fix_result_pose,
                 fix_inpaint_mask,
                 fix_original,
                 fix_img,
@@ -1850,6 +2145,20 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 fix_quality,
             ],
         )
     gr.Markdown("<h1>Citation</h1>")
     gr.Markdown(

 from typing import Optional
 import requests
 from huggingface_hub import hf_hub_download
+try:
+    import spaces
+except:
+    pass
 MAX_N = 6
 FIX_MAX_N = 6
 NEW_MODEL = True
 MODEL_EPOCH = 6
 REF_POSE_MASK = True
+HF = False
+pre_device = "cpu" if HF else "cuda"
+spaces_60_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
+spaces_120_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
+spaces_300_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
 def set_seed(seed):
     seed = int(seed)
     # Convert BytesIO object to numpy array
     buf.seek(0)
     img_pil = Image.open(buf)
+    img_pil = img_pil.resize((W, H))
     numpy_img = np.array(img_pil)
     return numpy_img
     print(f"encoder after eval() max: {max([p.max() for p in autoencoder.encoder.parameters()])}")
     print(f"autoencoder encoder after eval() dtype: {next(autoencoder.encoder.parameters()).dtype}")
     assert len(missing_keys) == 0
+sam_path = hf_hub_download(repo_id="Chaerin5/FoundHand-weights", filename="sam_vit_h_4b8939.pth", token=token)
+sam_predictor = init_sam(ckpt_path=sam_path, device=pre_device)
 print("Mediapipe hand detector and SAM ready...")
 mp_hands = mp.solutions.hands
     min_detection_confidence=0.1,
 )
+def prepare_anno(ref):
     if ref is None:
         return (
+            gr.Image.update(value=None),
+            gr.State.update(value=None),
         )
     img = ref["composite"][..., :3]
     img = cv2.resize(img, opts.image_size, interpolation=cv2.INTER_AREA)
     keypts = np.zeros((42, 2))
     if keypts is None:
         no_hands = cv2.resize(np.array(Image.open("no_hands.png"))[..., :3], (LENGTH, LENGTH))
         return None, no_hands, None
+    missing_keys, extra_keys = autoencoder.load_state_dict(vae_state_dict, strict=False)
     if isinstance(keypts, list):
         if len(keypts[0]) == 0:
             keypts[0] = np.zeros((21, 2))
         else:
             gr.Info("Number of right hand keypoints should be either 0 or 21.")
             return None, None, None
         if len(keypts[1]) == 0:
             keypts[1] = np.zeros((21, 2))
         elif len(keypts[1]) == 21:
         else:
             gr.Info("Number of left hand keypoints should be either 0 or 21.")
             return None, None, None
         keypts = np.concatenate(keypts, axis=0)
     if REF_POSE_MASK:
         sam_predictor.set_image(img)
                 Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
             ]
         )
+        image = image_transform(img).to(device)
         kpts_valid = check_keypoints_validity(keypts, target_size)
         heatmaps = torch.tensor(
             keypoint_heatmap(
             )
             * kpts_valid[:, None, None],
             dtype=torch.float,
+            device=device
         )[None, ...]
         mask = torch.tensor(
             cv2.resize(
                 interpolation=cv2.INTER_NEAREST,
             ),
             dtype=torch.float,
+            device=device,
         ).unsqueeze(0)[None, ...]
         return image[None, ...], heatmaps, mask
         img,
         keypts,
         hand_mask,
+        device=pre_device,
         target_size=opts.image_size,
         latent_size=opts.latent_size,
     )
     return img, ref_pose, ref_cond
+def get_target_anno(img, keypts):
+    if keypts is None:
+        no_hands = cv2.resize(np.array(Image.open("no_hands.png"))[..., :3], (LENGTH, LENGTH))
+        return None, no_hands, None, None
+    if isinstance(keypts, list):
+        if len(keypts[0]) == 0:
+            keypts[0] = np.zeros((21, 2))
+        elif len(keypts[0]) == 21:
+            keypts[0] = np.array(keypts[0], dtype=np.float32)
+        else:
+            gr.Info("Number of right hand keypoints should be either 0 or 21.")
+            return None, None, None
+        if len(keypts[1]) == 0:
+            keypts[1] = np.zeros((21, 2))
+        elif len(keypts[1]) == 21:
+            keypts[1] = np.array(keypts[1], dtype=np.float32)
+        else:
+            gr.Info("Number of left hand keypoints should be either 0 or 21.")
+            return None, None, None
+        keypts = np.concatenate(keypts, axis=0)
+    target_pose = visualize_hand(keypts, img)
+    kpts_valid = check_keypoints_validity(keypts, opts.image_size)
+    target_heatmaps = torch.tensor(
+        keypoint_heatmap(
+            scale_keypoint(keypts, opts.image_size, opts.latent_size),
+            opts.latent_size,
+            var=1.0,
         )
+        * kpts_valid[:, None, None],
+        dtype=torch.float,
+        device=pre_device,
+    )[None, ...]
+    target_cond = torch.cat(
+        [target_heatmaps, torch.zeros_like(target_heatmaps)[:, :1]], 1
+    )
+    return img, target_pose, target_cond, keypts
 def get_mask_inpaint(ref):
+    # inpaint_mask = np.zeros_like(img_original[:, :, 0])
+    # cropped_mask = np.array(ref["layers"][0])[..., -1]
+    # inpaint_mask[crop_coord[0][1]:crop_coord[1][1], crop_coord[0][0]:crop_coord[1][0]] = cropped_mask
     inpaint_mask = np.array(ref["layers"][0])[..., -1]
     inpaint_mask = cv2.resize(
         inpaint_mask, opts.image_size, interpolation=cv2.INTER_AREA
     return inpaint_mask
+def visualize_ref(brush): # crop,
+    if brush is None: # crop is None or
         return None
     inpainted = brush["layers"][0][..., -1]
+    img = brush["background"][..., :3]
+    # img = cv2.resize(img, inpainted.shape[::-1], interpolation=cv2.INTER_AREA)
     mask = inpainted < 128
     # img = img.astype(np.int32)
     # img[mask, :] = img[mask, :] - 50
         keypoints[1] = []
     return img, keypoints
+def stay_crop(img, crop_coord):
+    if img is not None:
+        crop_coord = [[0, 0], [img.shape[1], img.shape[0]]]
+        cropped = img.copy()
+        return crop_coord, cropped
+    else:
+        return None, None
+def process_crop(img, crop_coord, evt:gr.SelectData):
+    if len(crop_coord) == 2:
+        crop_coord = [list(evt.index)]
+        cropped = img.copy()
+    elif len(crop_coord) == 1:
+        new_coord =list(evt.index)
+        if new_coord[0] <= crop_coord[0][0] or new_coord[1] <= crop_coord[0][1]:
+            gr.Warning("Second click should be more under and more right thand the first click. Try second click again.", duration=3)
+            cropped = img.copy()
+        else:
+            crop_coord.append(new_coord)
+            x1, y1 = crop_coord[0]
+            x2, y2 = crop_coord[1]
+            cropped = img.copy()[y1:y2, x1:x2]
+    else:
+        gr.Error("Something is wrong", duration=3)
+    return crop_coord, cropped
+def disable_crop(crop_coord):
+    if len(crop_coord) == 2:
+        return gr.update(interactive=False)
+    else:
+        return gr.update(interactive=True)
+@spaces_60_fn
 def sample_diff(ref_cond, target_cond, target_keypts, num_gen, seed, cfg):
     set_seed(seed)
     z = torch.randn(
     print(f"results[0].max(): {results[0].max()}")
     return results, results_pose
+@spaces_120_fn
+def ready_sample(img_cropped, inpaint_mask, keypts):
+    # img = cv2.resize(img_ori[..., :3], opts.image_size, interpolation=cv2.INTER_AREA)
+    img = cv2.resize(img_cropped["background"][..., :3], opts.image_size, interpolation=cv2.INTER_AREA)
     sam_predictor.set_image(img)
     if len(keypts[0]) == 0:
         keypts[0] = np.zeros((21, 2))
     elif len(keypts[0]) == 21:
         keypts[0] = np.array(keypts[0], dtype=np.float32)
+        # keypts[0][:, 0] = keypts[0][:, 0] + crop_coord[0][0]
+        # keypts[0][:, 1] = keypts[0][:, 1] + crop_coord[0][1]
     else:
         gr.Info("Number of right hand keypoints should be either 0 or 21.")
         return None, None
         keypts[1] = np.zeros((21, 2))
     elif len(keypts[1]) == 21:
         keypts[1] = np.array(keypts[1], dtype=np.float32)
+        # keypts[1][:, 0] = keypts[1][:, 0] + crop_coord[0][0]
+        # keypts[1][:, 1] = keypts[1][:, 1] + crop_coord[0][1]
     else:
         gr.Info("Number of left hand keypoints should be either 0 or 21.")
         return None, None
     keypts = np.concatenate(keypts, axis=0)
+    keypts = scale_keypoint(keypts, (img_cropped["background"].shape[1], img_cropped["background"].shape[0]), opts.image_size)
     box_shift_ratio = 0.5
     box_size_factor = 1.2
             inpaint_mask, dsize=opts.latent_size, interpolation=cv2.INTER_NEAREST
         ),
         dtype=torch.float,
+        device=pre_device,
     ).unsqueeze(0)[None, ...]
     def make_ref_cond(
                 Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
             ]
         )
+        image = image_transform(img).to(device)
         kpts_valid = check_keypoints_validity(keypts, target_size)
         heatmaps = torch.tensor(
             keypoint_heatmap(
             )
             * kpts_valid[:, None, None],
             dtype=torch.float,
+            device=device,
         )[None, ...]
         mask = torch.tensor(
             cv2.resize(
                 interpolation=cv2.INTER_NEAREST,
             ),
             dtype=torch.float,
+            device=device,
         ).unsqueeze(0)[None, ...]
         return image[None, ...], heatmaps, mask
         img,
         keypts,
         hand_mask * (1 - inpaint_mask),
+        device=pre_device,
         target_size=opts.image_size,
         latent_size=opts.latent_size,
     )
         out = (gr.update(visible=True), gr.update(visible=False))
     return out
+@spaces_300_fn
 def sample_inpaint(
     ref_cond,
     target_cond,
     latent,
     inpaint_latent_mask,
     keypts,
+    img_original,
+    crop_coord,
     num_gen,
     seed,
     cfg,
     # visualize
     results = []
     results_pose = []
+    results_original = []
     for i in range(FIX_MAX_N):
         if i < num_gen:
+            res =sampled_images[i]
+            results.append(res)
+            results_pose.append(visualize_hand(keypts, res))
+            res = cv2.resize(res, (crop_coord[1][0]-crop_coord[0][0], crop_coord[1][1]-crop_coord[0][1]))
+            res_original = img_original.copy()
+            res_original[crop_coord[0][1]:crop_coord[1][1], crop_coord[0][0]:crop_coord[1][0], :] = res
+            results_original.append(res_original)
         else:
             results.append(placeholder)
             results_pose.append(placeholder)
+            results_original.append(placeholder)
+    return results, results_pose, results_original
 def flip_hand(
+    img, img_raw, pose_img, pose_manual_img,
+    manual_kp_right, manual_kp_left,
+    cond, auto_cond, manual_cond,
+    keypts=None, auto_keypts=None, manual_keypts=None
 ):
     if cond is None:  # clear clicked
+        return
     img["composite"] = img["composite"][:, ::-1, :]
     img["background"] = img["background"][:, ::-1, :]
     img["layers"] = [layer[:, ::-1, :] for layer in img["layers"]]
+    # for comp in [pose_img, pose_manual_img, manual_kp_right, manual_kp_left, cond, auto_cond, manual_cond]:
+    #     if comp is not None:
+    #         if isinstance(comp, torch.Tensor):
+    #             comp = comp.flip(-1)
+    #         else:
+    #             comp = comp[:, ::-1, :]
+    if img_raw is not None:
+        img_raw = img_raw[:, ::-1, :]
     pose_img = pose_img[:, ::-1, :]
+    if pose_manual_img is not None:
+        pose_manual_img = pose_manual_img[:, ::-1, :]
+    if manual_kp_right is not None:
+        manual_kp_right = manual_kp_right[:, ::-1, :]
+    if manual_kp_left is not None:
+        manual_kp_left = manual_kp_left[:, ::-1, :]
     cond = cond.flip(-1)
+    if auto_cond is not None:
+        auto_cond = auto_cond.flip(-1)
+    if manual_cond is not None:
+        manual_cond = manual_cond.flip(-1)
+    # for comp in [keypts, auto_keypts, manual_keypts]:
+    #     if comp is not None:
+    #         if comp[:21, :].sum() != 0:
+    #             comp[:21, 0] = opts.image_size[1] - comp[:21, 0]
+    #         if comp[21:, :].sum() != 0:
+    #             comp[21:, 0] = opts.image_size[1] - comp[21:, 0]
+    if keypts is not None:
         if keypts[:21, :].sum() != 0:
             keypts[:21, 0] = opts.image_size[1] - keypts[:21, 0]
         if keypts[21:, :].sum() != 0:
             keypts[21:, 0] = opts.image_size[1] - keypts[21:, 0]
+    if auto_keypts is not None:
+        if auto_keypts[:21, :].sum() != 0:
+            auto_keypts[:21, 0] = opts.image_size[1] - auto_keypts[:21, 0]
+        if auto_keypts[21:, :].sum() != 0:
+            auto_keypts[21:, 0] = opts.image_size[1] - auto_keypts[21:, 0]
+    if manual_keypts is not None:
+        if manual_keypts[:21, :].sum() != 0:
+            manual_keypts[:21, 0] = opts.image_size[1] - manual_keypts[:21, 0]
+        if manual_keypts[21:, :].sum() != 0:
+            manual_keypts[21:, 0] = opts.image_size[1] - manual_keypts[21:, 0]
+    return img, img_raw, pose_img, pose_manual_img, manual_kp_right, manual_kp_left, cond, auto_cond, manual_cond, keypts, auto_keypts, manual_keypts
 def resize_to_full(img):
 def clear_all():
     return (
         None,
+        [],
         None,
         None,
         None,
         None,
         None,
         None,
         False,
         None,
         None,
+        [],
+        None,
+        None,
+        None,
         None,
         None,
         None,
+        False,
         None,
         None,
         1,
         42,
         3.0,
         gr.update(interactive=False),
     )
         None,
         None,
         None,
+        [],
+        None,
+        None,
         None,
         None,
         None,
 def enable_component(image1, image2):
     if image1 is None or image2 is None:
         return gr.update(interactive=False)
+    if isinstance(image1, dict) and "background" in image1 and "layers" in image1 and "composite" in image1:
         if (
             image1["background"].sum() == 0
             and (sum([im.sum() for im in image1["layers"]]) == 0)
             and image1["composite"].sum() == 0
         ):
             return gr.update(interactive=False)
+    if isinstance(image1, dict) and "background" in image2 and "layers" in image2 and "composite" in image2:
         if (
             image2["background"].sum() == 0
             and (sum([im.sum() for im in image2["layers"]]) == 0)
 def set_unvisible():
     return (
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
         gr.update(visible=False),
         gr.update(visible=False),
         gr.update(visible=False),
         gr.update(visible=False)
     )
+def fix_set_unvisible():
+    return (
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False)
+    )
 def set_no_hands(decider, component):
     if decider is None:
         no_hands = cv2.resize(np.array(Image.open("no_hands.png"))[..., :3], (LENGTH, LENGTH))
         update_component = gr.update(visible=True)
     return update_component
 LENGTH = 480
 example_ref_imgs = [
     # ["bad_hands/4.jpg"],  # "bad_hands/4_mask.jpg"],
     ["bad_hands/5.jpg"],  # "bad_hands/5_mask.jpg"],
     ["bad_hands/6.jpg"],  # "bad_hands/6_mask.jpg"],
+    # ["bad_hands/7.jpg"],  # "bad_hands/7_mask.jpg"],
     # ["bad_hands/8.jpg"],  # "bad_hands/8_mask.jpg"],
     # ["bad_hands/9.jpg"],  # "bad_hands/9_mask.jpg"],
     # ["bad_hands/10.jpg"],  # "bad_hands/10_mask.jpg"],
 with gr.Blocks(css=custom_css, theme="soft") as demo:
     gr.Markdown(_HEADER_)
     with gr.Tab("Edit Hand Poses"):
+        dump = gr.State(value=None)
+        # ref states
         ref_img = gr.State(value=None)
         ref_im_raw = gr.State(value=None)
         ref_kp_raw = gr.State(value=0)
         ref_kp_got = gr.State(value=None)
         ref_manual_cond = gr.State(value=None)
         ref_auto_cond = gr.State(value=None)
+        ref_cond = gr.State(value=None)
+        # target states
         target_img = gr.State(value=None)
+        target_im_raw = gr.State(value=None)
+        target_kp_raw = gr.State(value=0)
+        target_kp_got = gr.State(value=None)
+        target_manual_keypts = gr.State(value=None)
+        target_auto_keypts = gr.State(value=None)
         target_keypts = gr.State(value=None)
+        target_manual_cond = gr.State(value=None)
+        target_auto_cond = gr.State(value=None)
+        target_cond = gr.State(value=None)
+        # main tab
         with gr.Row():
+            # ref column
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">1. Upload a hand image to edit 📥</p>"""
                 ref_flip = gr.Checkbox(
                     value=False, label="Flip Handedness (Reference)", interactive=False
                 )
+            # target column
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">2. Upload a hand image for target hand pose 📥</p>"""
                 target_finish_crop = gr.Button(
                     value="Finish Cropping", interactive=False
                 )
+                with gr.Tab("Automatic hand keypoints"):
+                    target_pose = gr.Image(
+                        type="numpy",
+                        label="Target Pose",
+                        show_label=True,
+                        height=LENGTH,
+                        width=LENGTH,
+                        interactive=False,
+                    )
+                    target_use_auto = gr.Button(value="Click here to use automatic, not manual", interactive=False, visible=True)
+                with gr.Tab("Manual hand keypoints"):
+                    target_manual_checkbox_info = gr.Markdown(
+                        """<p style="text-align: center;"><b>Step 1.</b> Tell us if this is right, left, or both hands.</p>""",
+                        visible=True,
+                    )
+                    target_manual_checkbox = gr.CheckboxGroup(
+                        ["Right hand", "Left hand"],
+                        show_label=False,
+                        visible=True,
+                        interactive=True,
+                    )
+                    target_manual_kp_r_info = gr.Markdown(
+                        """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>right</b> hand. See \"OpenPose Keypoint Convention\" for guidance.</p>""",
+                        visible=False,
+                    )
+                    target_manual_kp_right = gr.Image(
+                        type="numpy",
+                        label="Keypoint Selection (right hand)",
+                        show_label=True,
+                        height=LENGTH,
+                        width=LENGTH,
+                        interactive=False,
+                        visible=False,
+                        sources=[],
+                    )
+                    with gr.Row():
+                        target_manual_undo_right = gr.Button(
+                            value="Undo", interactive=True, visible=False
+                        )
+                        target_manual_reset_right = gr.Button(
+                            value="Reset", interactive=True, visible=False
+                        )
+                    target_manual_kp_l_info = gr.Markdown(
+                        """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>left</b> hand. See \"OpenPose keypoint convention\" for guidance.</p>""",
+                        visible=False
+                    )
+                    target_manual_kp_left = gr.Image(
+                        type="numpy",
+                        label="Keypoint Selection (left hand)",
+                        show_label=True,
+                        height=LENGTH,
+                        width=LENGTH,
+                        interactive=False,
+                        visible=False,
+                        sources=[],
+                    )
+                    with gr.Row():
+                        target_manual_undo_left = gr.Button(
+                            value="Undo", interactive=True, visible=False
+                        )
+                        target_manual_reset_left = gr.Button(
+                            value="Reset", interactive=True, visible=False
+                        )
+                    target_manual_done_info = gr.Markdown(
+                        """<p style="text-align: center;"><b>Step 3.</b> Hit \"Done\" button to confirm.</p>""",
+                        visible=False,
+                    )
+                    target_manual_done = gr.Button(value="Done", interactive=True, visible=False)
+                    target_manual_pose = gr.Image(
+                        type="numpy",
+                        label="Target Pose",
+                        show_label=True,
+                        height=LENGTH,
+                        width=LENGTH,
+                        interactive=False,
+                        visible=False
+                    )
+                    target_use_manual = gr.Button(value="Click here to use manual, not automatic", interactive=True, visible=False)
+                    target_manual_instruct = gr.Markdown(
+                        value="""<p style="text-align: left; font-weight: bold; ">OpenPose Keypoints Convention</p>""",
+                        visible=True
+                    )
+                    target_manual_openpose = gr.Image(
+                        value="openpose.png",
+                        type="numpy",
+                        show_label=False,
+                        height=LENGTH // 2,
+                        width=LENGTH // 2,
+                        interactive=False,
+                        visible=True
+                    )
                 gr.Markdown(
                     """<p style="text-align: center;">&#9314; Optionally flip the hand</p>"""
                 )
                 target_flip = gr.Checkbox(
                     value=False, label="Flip Handedness (Target)", interactive=False
                 )
+            # result column
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">3. Press &quot;Run&quot; to get the edited results 🎯</p>"""
                     interactive=True,
                 )
+        # reference listeners
         ref.change(enable_component, [ref, ref], ref_finish_crop)
+        ref_finish_crop.click(prepare_anno, [ref], [ref_im_raw, ref_kp_raw])
         ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_right)
         ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_left)
+        ref_kp_raw.change(get_ref_anno, [ref_im_raw, ref_kp_raw], [ref_img, ref_pose, ref_auto_cond])
+        ref_pose.change(enable_component, [ref_kp_raw, ref_pose], ref_use_auto)
+        ref_pose.change(enable_component, [ref_img, ref_pose], ref_flip)
+        ref_auto_cond.change(lambda x: x, ref_auto_cond, ref_cond)
+        ref_use_auto.click(lambda x: x, ref_auto_cond, ref_cond)
+        ref_use_auto.click(lambda x: gr.Info("Automatic hand keypoints will be used for 'Reference'", duration=3))
         ref_manual_checkbox.select(
             set_visible,
             [ref_manual_checkbox, ref_kp_got, ref_im_raw, ref_manual_kp_right, ref_manual_kp_left, ref_manual_done],
         ref_manual_reset_left.click(
             reset_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
         )
+        ref_manual_done.click(visible_component, [gr.State(0), ref_manual_pose], ref_manual_pose)
+        ref_manual_done.click(visible_component, [gr.State(0), ref_use_manual], ref_use_manual)
         ref_manual_done.click(get_ref_anno, [ref_im_raw, ref_kp_got], [ref_img, ref_manual_pose, ref_manual_cond])
         ref_manual_pose.change(enable_component, [ref_manual_pose, ref_manual_pose], ref_manual_done)
         ref_manual_pose.change(enable_component, [ref_img, ref_manual_pose], ref_flip)
+        ref_manual_cond.change(lambda x: x, ref_manual_cond, ref_cond)
+        ref_use_manual.click(lambda x: x, ref_manual_cond, ref_cond)
+        ref_use_manual.click(lambda x: gr.Info("Manual hand keypoints will be used for 'Reference'", duration=3))
         ref_flip.select(
+            flip_hand,
+            [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond],
+            [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond]
         )
+        # target listeners
         target.change(enable_component, [target, target], target_finish_crop)
+        target_finish_crop.click(prepare_anno, [target], [target_im_raw, target_kp_raw])
+        target_kp_raw.change(lambda x:x, target_im_raw, target_manual_kp_right)
+        target_kp_raw.change(lambda x:x, target_im_raw, target_manual_kp_left)
+        target_kp_raw.change(get_target_anno, [target_im_raw, target_kp_raw], [target_img, target_pose, target_auto_cond, target_auto_keypts])
+        target_pose.change(enable_component, [target_kp_raw, target_pose], target_use_auto)
         target_pose.change(enable_component, [target_img, target_pose], target_flip)
+        target_auto_cond.change(lambda x: x, target_auto_cond, target_cond)
+        target_auto_keypts.change(lambda x: x, target_auto_keypts, target_keypts)
+        target_use_auto.click(lambda x: x, target_auto_cond, target_cond)
+        target_use_auto.click(lambda x: x, target_auto_keypts, target_keypts)
+        target_use_auto.click(lambda x: gr.Info("Automatic hand keypoints will be used for 'Target'", duration=3))
+        target_manual_checkbox.select(
+            set_visible,
+            [target_manual_checkbox, target_kp_got, target_im_raw, target_manual_kp_right, target_manual_kp_left, target_manual_done],
+            [
+                target_kp_got,
+                target_manual_kp_right,
+                target_manual_kp_left,
+                target_manual_kp_right,
+                target_manual_undo_right,
+                target_manual_reset_right,
+                target_manual_kp_left,
+                target_manual_undo_left,
+                target_manual_reset_left,
+                target_manual_kp_r_info,
+                target_manual_kp_l_info,
+                target_manual_done,
+                target_manual_done_info
+            ]
+        )
+        target_manual_kp_right.select(
+            get_kps, [target_im_raw, target_kp_got, gr.State("right")], [target_manual_kp_right, target_kp_got]
+        )
+        target_manual_undo_right.click(
+            undo_kps, [target_im_raw, target_kp_got, gr.State("right")], [target_manual_kp_right, target_kp_got]
+        )
+        target_manual_reset_right.click(
+            reset_kps, [target_im_raw, target_kp_got, gr.State("right")], [target_manual_kp_right, target_kp_got]
+        )
+        target_manual_kp_left.select(
+            get_kps, [target_im_raw, target_kp_got, gr.State("left")], [target_manual_kp_left, target_kp_got]
+        )
+        target_manual_undo_left.click(
+            undo_kps, [target_im_raw, target_kp_got, gr.State("left")], [target_manual_kp_left, target_kp_got]
+        )
+        target_manual_reset_left.click(
+            reset_kps, [target_im_raw, target_kp_got, gr.State("left")], [target_manual_kp_left, target_kp_got]
+        )
+        target_manual_done.click(visible_component, [gr.State(0), target_manual_pose], target_manual_pose)
+        target_manual_done.click(visible_component, [gr.State(0), target_use_manual], target_use_manual)
+        target_manual_done.click(get_target_anno, [target_im_raw, target_kp_got], [target_img, target_manual_pose, target_manual_cond, target_manual_keypts])
+        target_manual_pose.change(enable_component, [target_manual_pose, target_manual_pose], target_manual_done)
+        target_manual_pose.change(enable_component, [target_img, target_manual_pose], target_flip)
+        target_manual_cond.change(lambda x: x, target_manual_cond, target_cond)
+        target_manual_keypts.change(lambda x: x, target_manual_keypts, target_keypts)
+        target_use_manual.click(lambda x: x, target_manual_cond, target_cond)
+        target_use_manual.click(lambda x: x, target_manual_keypts, target_keypts)
+        target_use_manual.click(lambda x: gr.Info("Manual hand keypoints will be used for 'Reference'", duration=3))
         target_flip.select(
             flip_hand,
+            [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
+            [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
         )
+        # run listerners
+        ref_cond.change(enable_component, [ref_cond, target_cond], run)
+        target_cond.change(enable_component, [ref_cond, target_cond], run)
+        # ref_manual_pose.change(enable_component, [ref_manual_pose, target_manual_pose], run)
+        # target_manual_pose.change(enable_component, [ref_manual_pose, target_manual_pose], run)
         run.click(
             sample_diff,
             [ref_cond, target_cond, target_keypts, n_generation, seed, cfg],
             [],
             [
                 ref,
+                ref_manual_checkbox,
                 ref_manual_kp_right,
                 ref_manual_kp_left,
+                ref_img,
                 ref_pose,
                 ref_manual_pose,
+                ref_cond,
                 ref_flip,
                 target,
+                target_keypts,
+                target_manual_checkbox,
+                target_manual_kp_right,
+                target_manual_kp_left,
+                target_img,
                 target_pose,
+                target_manual_pose,
+                target_cond,
                 target_flip,
                 results,
                 results_pose,
                 n_generation,
                 seed,
                 cfg,
                 ref_kp_raw,
             ],
         )
         clear.click(
             set_unvisible,
             [],
             [
                 ref_manual_kp_l_info,
+                ref_manual_kp_r_info,
+                ref_manual_kp_left,
+                ref_manual_kp_right,
                 ref_manual_undo_left,
                 ref_manual_undo_right,
                 ref_manual_reset_left,
                 ref_manual_done_info,
                 ref_manual_pose,
                 ref_use_manual,
+                target_manual_kp_l_info,
+                target_manual_kp_r_info,
+                target_manual_kp_left,
+                target_manual_kp_right,
+                target_manual_undo_left,
+                target_manual_undo_right,
+                target_manual_reset_left,
+                target_manual_reset_right,
+                target_manual_done,
+                target_manual_done_info,
+                target_manual_pose,
+                target_use_manual,
             ]
         )
     with gr.Tab("Fix Hands"):
         fix_inpaint_mask = gr.State(value=None)
         fix_original = gr.State(value=None)
+        fix_crop_coord = gr.State(value=None)
         fix_img = gr.State(value=None)
         fix_kpts = gr.State(value=None)
         fix_kpts_np = gr.State(value=None)
         fix_latent = gr.State(value=None)
         fix_inpaint_latent = gr.State(value=None)
         with gr.Row():
+            # crop & brush
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">1. Upload a malformed hand image to fix 📥</p>"""
                 )
                 gr.Markdown(
+                    """<p style="text-align: center;">&#9312; Optionally crop the image by clicking <b>top left</b> and <b>bottom right</b> of your desired bounding box around the hand. </p>"""
                 )
+                # fix_crop = gr.ImageEditor(
+                #     type="numpy",
+                #     sources=["upload", "webcam", "clipboard"],
+                #     label="Image crop",
+                #     show_label=True,
+                #     height=LENGTH,
+                #     width=LENGTH,
+                #     layers=False,
+                #     # crop_size="1:1",
+                #     transforms=(),
+                #     brush=False,
+                #     image_mode="RGBA",
+                #     container=False,
+                # )
+                fix_crop = gr.Image(
                     type="numpy",
                     sources=["upload", "webcam", "clipboard"],
+                    label="Input Image",
                     show_label=True,
                     height=LENGTH,
                     width=LENGTH,
+                    interactive=True,
+                    visible=True,
+                )
+                gr.Markdown(
+                    """<p style="text-align: center;">💡 If you crop, the model can focus on more details of the cropped area. Square crops might work better than rectangle crops.</p>"""
                 )
+                # fix_tmp = gr.Image(
+                #     type="numpy",
+                #     label="tmp",
+                #     show_label=True,
+                #     height=LENGTH,
+                #     width=LENGTH,
+                #     interactive=True,
+                #     visible=True,
+                #     sources=[],
+                # )
                 fix_example = gr.Examples(
                     fix_example_imgs,
                     inputs=[fix_crop],
                     examples_per_page=20,
                 )
                 gr.Markdown(
+                    """<p style="text-align: center;">&#9313; Brush area (e.g., wrong finger) that needs to be fixed. Don't brush the entire hand!</p>"""
                 )
                 fix_ref = gr.ImageEditor(
                     type="numpy",
+                    label="Image Brushing",
                     sources=(),
                     show_label=True,
                     height=LENGTH,
                     container=False,
                     interactive=False,
                 )
+                gr.Markdown(
+                    """<p style="text-align: center;">&#9314; Hit the \"Finish Cropping & Brushing\" button</p>"""
+                )
                 fix_finish_crop = gr.Button(
                     value="Finish Croping & Brushing", interactive=False
                 )
+            # keypoint selection
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">2. Click on hand to get target hand pose</p>"""
                     show_label=False,
                     interactive=False,
                 )
                 fix_kp_r_info = gr.Markdown(
+                    """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
+                    visible=False
                 )
+                # fix_kp_r_info = gr.Markdown(
+                    # """<p style="text-align: center; font-size: 20px; font-weight: bold; ">Select right only</p>""",
+                    # visible=False,
+                # )
                 fix_kp_right = gr.Image(
                     type="numpy",
                     label="Keypoint Selection (right hand)",
                         value="Reset", interactive=False, visible=False
                     )
                 fix_kp_l_info = gr.Markdown(
+                    """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>left hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
                     visible=False
                 )
                 fix_kp_left = gr.Image(
                     width=LENGTH // 2,
                     interactive=False,
                 )
+            # get latent
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">3. Press &quot;Ready&quot; to start pre-processing</p>"""
                 )
                 fix_ready = gr.Button(value="Ready", interactive=False)
                 gr.Markdown(
+                    """<p style="text-align: center; font-weight: bold; ">Visualized (256, 256)-resized, brushed image</p>"""
                 )
                 fix_vis_mask32 = gr.Image(
                     type="numpy",
                     width=opts.image_size,
                     interactive=False,
                 )
+                # gr.Markdown(
+                #     """<p style="text-align: center;">[NOTE] Above should be inpaint mask that you brushed, NOT the segmentation mask of the entire hand. </p>"""
+                # )
+            # result column
             with gr.Column():
                 gr.Markdown(
                     """<p style="text-align: center; font-size: 20px; font-weight: bold;">4. Press &quot;Run&quot; to get the fixed hand image 🎯</p>"""
                 gr.Markdown(
                     """<p style="text-align: center;">⚠️  >3min and ~24GB per generation</p>"""
                 )
+                fix_result_original = gr.Gallery(
+                    type="numpy",
+                    label="Results on original input",
+                    show_label=True,
+                    height=LENGTH,
+                    min_width=LENGTH,
+                    columns=FIX_MAX_N,
+                    interactive=False,
+                    preview=True,
+                )
                 fix_result = gr.Gallery(
                     type="numpy",
                     label="Results",
                 )
                 fix_clear = gr.ClearButton()
+        with gr.Tab("More options"):
+            gr.Markdown(
+                "⚠️ Currently, Number of generation > 1 could lead to out-of-memory"
             )
+            with gr.Row():
+                fix_n_generation = gr.Slider(
+                    label="Number of generations",
+                    value=1,
+                    minimum=1,
+                    maximum=FIX_MAX_N,
+                    step=1,
+                    randomize=False,
+                    interactive=True,
+                )
+                fix_seed = gr.Slider(
+                    label="Seed",
+                    value=42,
+                    minimum=0,
+                    maximum=10000,
+                    step=1,
+                    randomize=False,
+                    interactive=True,
+                )
+                fix_cfg = gr.Slider(
+                    label="Classifier free guidance scale",
+                    value=3.0,
+                    minimum=0.0,
+                    maximum=10.0,
+                    step=0.1,
+                    randomize=False,
+                    interactive=True,
+                )
+                fix_quality = gr.Slider(
+                    label="Quality",
+                    value=10,
+                    minimum=1,
+                    maximum=10,
+                    step=1,
+                    randomize=False,
+                    interactive=True,
+                )
+        # listeners
+        # fix_crop.change(resize_to_full, fix_crop, fix_ref)
+        fix_crop.change(lambda x: x, fix_crop, fix_original)  # fix_original: (real_H, real_W, 3)
+        fix_crop.change(stay_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
+        fix_crop.select(process_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
+        # fix_ref.change(disable_crop, fix_crop_coord, fix_crop)
+        fix_ref.change(enable_component, [fix_crop, fix_crop], fix_ref)
+        fix_ref.change(enable_component, [fix_crop, fix_crop], fix_finish_crop)
+        fix_finish_crop.click(visualize_ref, [fix_ref], [fix_img])
+        fix_finish_crop.click(get_mask_inpaint, [fix_ref], [fix_inpaint_mask])  # fix_ref: (real_cropped_H, real_cropped_W, 3)
         fix_img.change(lambda x: x, [fix_img], [fix_kp_right])
         fix_img.change(lambda x: x, [fix_img], [fix_kp_left])
         fix_inpaint_mask.change(
             ],
         )
         fix_kp_right.select(
+            get_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts]  # fix_img: (real_cropped_H, real_cropped_W, 3)
         )
         fix_undo_right.click(
             undo_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts]
         )
         fix_ready.click(
             ready_sample,
+            [fix_ref, fix_inpaint_mask, fix_kpts],
             [
                 fix_ref_cond,
                 fix_target_cond,
                 fix_latent,
                 fix_inpaint_latent,
                 fix_kpts_np,
+                fix_original,
+                fix_crop_coord,
                 fix_n_generation,
                 fix_seed,
                 fix_cfg,
                 fix_quality,
             ],
+            [fix_result, fix_result_pose, fix_result_original],
         )
         fix_clear.click(
             fix_clear_all,
             [],
             [
                 fix_crop,
+                fix_crop_coord,
                 fix_ref,
+                fix_checkbox,
                 fix_kp_right,
                 fix_kp_left,
                 fix_result,
                 fix_result_pose,
+                fix_result_original,
                 fix_inpaint_mask,
                 fix_original,
                 fix_img,
                 fix_quality,
             ],
         )
+        fix_clear.click(
+            fix_set_unvisible,
+            [],
+            [
+                fix_kp_right,
+                fix_kp_left,
+                fix_kp_r_info,
+                fix_kp_l_info,
+                fix_undo_left,
+                fix_undo_right,
+                fix_reset_left,
+                fix_reset_right
+            ]
+        )
     gr.Markdown("<h1>Citation</h1>")
     gr.Markdown(

brown_logo.png ADDED Viewed

Git LFS Details

SHA256: 654ac3b7a615ed09cfaaf1cb0bc1d8a53051a42598fb3cba3e5620ba255e6a7c
Pointer size: 130 Bytes
Size of remote file: 35.8 kB

meta_logo.png ADDED Viewed

Git LFS Details

SHA256: d573af322a5fd721558b0d677dc963213d2696696b23e999179df3144ba6271b
Pointer size: 130 Bytes
Size of remote file: 21.6 kB

sbatch/sbatch_demo.sh ADDED Viewed

	@@ -0,0 +1,38 @@

+#!/bin/bash
+# job name
+#SBATCH -J demo_foundhand
+# partition
+#SBATCH --partition=ssrinath-gcondo --gres=gpu:1 --gres-flags=enforce-binding
+#SBATCH --account=ssrinath-gcondo
+# ensures all allocated cores are on the same node
+#SBATCH -N 1
+# cpu cores
+#SBATCH --ntasks-per-node=4
+# memory per node
+#SBATCH --mem=32G
+# runtime
+#SBATCH -t 240:00:00
+# output
+#SBATCH -o out/demo.out
+# error
+#SBATCH -e err/demo.err
+# email notifiaction
+# SBATCH --mail-type=ALL
+module load miniconda3/23.11.0s
+source /oscar/runtime/software/external/miniconda3/23.11.0/etc/profile.d/conda.sh
+conda activate handdiff
+cd $HOME/hdd/FoundHand_demo
+echo Directory is `pwd`
+python -u app.py

vqvae.py CHANGED Viewed

@@ -20,7 +20,10 @@ from typing import List
 import torch
 import torch.nn.functional as F
 from torch import nn
-import spaces
 class Autoencoder(nn.Module):

 import torch
 import torch.nn.functional as F
 from torch import nn
+try:
+    import spaces
+except:
+    pass
 class Autoencoder(nn.Module):