Spaces:

panda1835
/

dinov2_embedding

Running

panda1835 commited on Jan 29, 2024

Commit

4f13ac7

verified ·

1 Parent(s): 59d3daf

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import torch
+import torchvision.transforms as T
+import numpy as np
+from PIL import Image
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DINOv2
+# Select checkpoint
+dinov2_ckpt = ['dinov2_vits14', 'dinov2_vitb14', 'dinov2_vitl14', 'dinov2_vitg14'][1]
+dinov2 = torch.hub.load('facebookresearch/dinov2', dinov2_ckpt)
+dinov2.to(device)
+print()
+transform_image = T.Compose([
+    T.Resize((224, 224)),
+    T.ToTensor(),
+    T.Normalize(mean=[0.485, 0.456, 0.406],
+                        std=[0.229, 0.224, 0.225])
+])
+def predict(image):
+  """
+  Predict the identity of an image.
+  Args:
+    image: A PIL Image object.
+  Returns:
+    A string representing the predicted identity of the image.
+  """
+  # Convert the image to a tensor.
+  transformed_img = transform_image(image)[:3].unsqueeze(0).to(device)
+  # Get the embedding of the image.
+  with torch.no_grad():
+    embedding = dinov2(transformed_img)
+    print(embedding.shape)
+    embedding = embedding[0].cpu().numpy().tolist()
+    print(embedding)
+  return {
+      "embedding": embedding
+  }
+# Create a Gradio interface.
+interface = gr.Interface(
+    fn=predict,
+    inputs=[gr.Image(type='pil')],
+    outputs=[gr.JSON()],
+    title="DINOv2 Image Retrieval",
+)
+# Start the Gradio server.
+interface.launch()