Spaces:

methodw
/

xbgp

Running

App Files Files

methodw commited on Dec 19, 2025

Commit

80bed1b

verified ·

1 Parent(s): fb25759

switch to dinov3

Browse files

Files changed (3) hide show

.gitattributes +1 -0
app.py +16 -20
xbgp-faiss.index +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ xbgp-faiss.index filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -6,13 +6,15 @@ import json
 import numpy as np
 import faiss
 # Init similarity search AI model and processor
 device = torch.device("cpu")
-processor = AutoImageProcessor.from_pretrained("facebook/dinov2-large")
-model = AutoModel.from_pretrained("facebook/dinov2-large")
 model.config.return_dict = False  # Set return_dict to False for JIT tracing
 model.to(device)
 # Prepare an example input for tracing
 example_input = torch.rand(1, 3, 224, 224).to(device)  # Adjust size if needed
@@ -29,12 +31,8 @@ with open("xbgp-faiss-map.json", "r") as f:
 def process_image(image):
     """
-    Process the image and extract features using the DINOv2 model.
     """
-    # Add your image processing code here.
-    # This will include preprocessing the image, passing it through the model,
-    # and then formatting the output (extracted features).
     # Convert to RGB if it isn't already
     if image.mode != "RGB":
         image = image.convert("RGB")
@@ -49,31 +47,27 @@ def process_image(image):
         h_percent = 224 / float(height)
         new_height = 224
         new_width = int(float(width) * float(h_percent))
-    image = image.resize((new_width, new_height), Image.LANCZOS)
     # Extract the features from the uploaded image
     with torch.no_grad():
         inputs = processor(images=image, return_tensors="pt")["pixel_values"].to(device)
-        # Use the traced model for inference
         outputs = traced_model(inputs)
-    # Normalize the features before search, whatever that means
-    embeddings = outputs[0].mean(dim=1)
-    vector = embeddings.detach().cpu().numpy()
-    vector = np.float32(vector)
-    faiss.normalize_L2(vector)
     # Read the index file and perform search of top 50 images
     distances, indices = index.search(vector, 50)
     matches = []
     for idx, matching_gamerpic in enumerate(indices[0]):
         gamerpic = {}
         gamerpic["id"] = images[matching_gamerpic]
         gamerpic["score"] = str(round((1 / (distances[0][idx] + 1) * 100), 2)) + "%"
         matches.append(gamerpic)
     return matches
@@ -82,8 +76,10 @@ def process_image(image):
 # Create a Gradio interface
 iface = gr.Interface(
     fn=process_image,
-    inputs=gr.Image(type="pil"),  # Adjust the shape as needed
-    outputs="json",  # Or any other output format that suits your needs
 ).queue()
 # Launch the Gradio app

 import numpy as np
 import faiss
 # Init similarity search AI model and processor
 device = torch.device("cpu")
+processor = AutoImageProcessor.from_pretrained(
+    "facebook/dinov3-vitb16-pretrain-lvd1689m"
+)
+model = AutoModel.from_pretrained("facebook/dinov3-vitb16-pretrain-lvd1689m")
 model.config.return_dict = False  # Set return_dict to False for JIT tracing
 model.to(device)
+model.eval()  # Set model to evaluation mode for inference
 # Prepare an example input for tracing
 example_input = torch.rand(1, 3, 224, 224).to(device)  # Adjust size if needed
 def process_image(image):
     """
+    Process the image and extract features using the DINOv3 model.
     """
     # Convert to RGB if it isn't already
     if image.mode != "RGB":
         image = image.convert("RGB")
         h_percent = 224 / float(height)
         new_height = 224
         new_width = int(float(width) * float(h_percent))
+    image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
     # Extract the features from the uploaded image
     with torch.no_grad():
         inputs = processor(images=image, return_tensors="pt")["pixel_values"].to(device)
         outputs = traced_model(inputs)
+        # Normalize the features before search
+        embeddings = outputs[0].mean(dim=1)
+        vector = embeddings.detach().cpu().numpy()
+        vector = np.float32(vector)
+        faiss.normalize_L2(vector)
     # Read the index file and perform search of top 50 images
     distances, indices = index.search(vector, 50)
     matches = []
     for idx, matching_gamerpic in enumerate(indices[0]):
         gamerpic = {}
         gamerpic["id"] = images[matching_gamerpic]
         gamerpic["score"] = str(round((1 / (distances[0][idx] + 1) * 100), 2)) + "%"
         matches.append(gamerpic)
     return matches
 # Create a Gradio interface
 iface = gr.Interface(
     fn=process_image,
+    inputs=gr.Image(type="pil"),
+    outputs="json",
+    title="Xbox Gamerpic Finder - DINOv3",
+    description="Upload an image to find similar Xbox 360 gamerpics using Meta's DINOv3 vision model",
 ).queue()
 # Launch the Gradio app

xbgp-faiss.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf0c98cc8548885e189144a32cb31c352cd691ff4b8194d592bedb1160544ec0
+size 100042797