Spaces:

odunkel
/

DIY-SC

Running on Zero

App Files Files Community

odunkel commited on Jun 25, 2025

Commit

fd625c4

verified ·

1 Parent(s): 98aae95

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import spaces
 import gradio as gr
 import numpy as np
 import torch
 import torch.nn as nn
 from PIL import Image, ImageDraw
@@ -54,21 +55,22 @@ def resize(img, target_res=224, resize=True, to_pil=True, edge=False, sampling_f
     return canvas
 # ─── Feature extraction ──────────────────────────────────────────
-@spaces.GPU(duration=20)
 def get_processed_features_dino(num_patches, img,use_dummy):
-    batch = extractor_vit.preprocess_pil(img)
-    features_dino = extractor_vit.extract_descriptors(batch.to(extractor_vit.device), layer=11, facet='token') \
-                                    .permute(0,1,3,2) \
-                                    .reshape(1, -1, num_patches, num_patches)
-    # Project + normalize
     with torch.no_grad():
         if use_dummy == "DINOv2":
             desc = aggre_net_dummy(features_dino)
         else:
             desc = aggre_net(features_dino)
         norms = torch.linalg.norm(desc, dim=1, keepdim=True)
         desc = desc / (norms + 1e-8)
-    desc = desc.cpu()
     torch.cuda.empty_cache()
     return desc  # shape [1, C, num_patches, num_patches]
@@ -86,7 +88,6 @@ def get_sim(
     y, x = coord  # row, col
     # Upsample both feature maps to [1, C, img_size, img_size]
-    upsampler = nn.Upsample(size=(img_size, img_size), mode='bilinear', align_corners=False)
     src_ft = upsampler(feat1)  # [1, C, img_size, img_size]
     trg_ft = upsampler(feat2)
@@ -176,7 +177,7 @@ def reload_img(
 # ─── Configuration ───────────────────────────────────────────────
-num_patches = 45
 target_res = num_patches * 14
 ckpt_file = "./ckpts/dino_spair_0300.pth"
@@ -188,6 +189,11 @@ aggre_net.load_pretrained_weights(torch.load(ckpt_file, map_location=device))
 aggre_net_dummy  = DummyAggregationNetwork()
 extractor_vit = ViTExtractor('dinov2_vitb14', stride=14, device=device)
 # ─── Build Gradio UI ──────────────────────────────────────────────
 with gr.Blocks() as demo:
     # Hidden states to hold features

 import spaces
 import gradio as gr
 import numpy as np
+import gc
 import torch
 import torch.nn as nn
 from PIL import Image, ImageDraw
     return canvas
 # ─── Feature extraction ──────────────────────────────────────────
+@spaces.GPU(duration=0)
 def get_processed_features_dino(num_patches, img,use_dummy):
     with torch.no_grad():
+        batch = extractor_vit.preprocess_pil(img)
+        features_dino = extractor_vit.extract_descriptors(batch.to(extractor_vit.device), layer=11, facet='token') \
+                                        .permute(0,1,3,2) \
+                                        .reshape(1, -1, num_patches, num_patches)
         if use_dummy == "DINOv2":
             desc = aggre_net_dummy(features_dino)
         else:
             desc = aggre_net(features_dino)
         norms = torch.linalg.norm(desc, dim=1, keepdim=True)
         desc = desc / (norms + 1e-8)
+    desc = desc.cpu().detach()
+    del batch, features_dino
+    gc.collect()
     torch.cuda.empty_cache()
     return desc  # shape [1, C, num_patches, num_patches]
     y, x = coord  # row, col
     # Upsample both feature maps to [1, C, img_size, img_size]
     src_ft = upsampler(feat1)  # [1, C, img_size, img_size]
     trg_ft = upsampler(feat2)
 # ─── Configuration ───────────────────────────────────────────────
+num_patches = 30
 target_res = num_patches * 14
 ckpt_file = "./ckpts/dino_spair_0300.pth"
 aggre_net_dummy  = DummyAggregationNetwork()
 extractor_vit = ViTExtractor('dinov2_vitb14', stride=14, device=device)
+aggre_net = aggre_net.eval()
+extractor_vit.model.eval()
+upsampler = nn.Upsample(size=(target_res, target_res), mode='bilinear', align_corners=False)
 # ─── Build Gradio UI ──────────────────────────────────────────────
 with gr.Blocks() as demo:
     # Hidden states to hold features