methodw commited on
Commit
80bed1b
·
verified ·
1 Parent(s): fb25759

switch to dinov3

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. app.py +16 -20
  3. xbgp-faiss.index +3 -0
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ xbgp-faiss.index filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -6,13 +6,15 @@ import json
6
  import numpy as np
7
  import faiss
8
 
9
-
10
  # Init similarity search AI model and processor
11
  device = torch.device("cpu")
12
- processor = AutoImageProcessor.from_pretrained("facebook/dinov2-large")
13
- model = AutoModel.from_pretrained("facebook/dinov2-large")
 
 
14
  model.config.return_dict = False # Set return_dict to False for JIT tracing
15
  model.to(device)
 
16
 
17
  # Prepare an example input for tracing
18
  example_input = torch.rand(1, 3, 224, 224).to(device) # Adjust size if needed
@@ -29,12 +31,8 @@ with open("xbgp-faiss-map.json", "r") as f:
29
 
30
  def process_image(image):
31
  """
32
- Process the image and extract features using the DINOv2 model.
33
  """
34
- # Add your image processing code here.
35
- # This will include preprocessing the image, passing it through the model,
36
- # and then formatting the output (extracted features).
37
-
38
  # Convert to RGB if it isn't already
39
  if image.mode != "RGB":
40
  image = image.convert("RGB")
@@ -49,31 +47,27 @@ def process_image(image):
49
  h_percent = 224 / float(height)
50
  new_height = 224
51
  new_width = int(float(width) * float(h_percent))
52
- image = image.resize((new_width, new_height), Image.LANCZOS)
53
 
54
  # Extract the features from the uploaded image
55
  with torch.no_grad():
56
  inputs = processor(images=image, return_tensors="pt")["pixel_values"].to(device)
57
-
58
- # Use the traced model for inference
59
  outputs = traced_model(inputs)
60
 
61
- # Normalize the features before search, whatever that means
62
- embeddings = outputs[0].mean(dim=1)
63
- vector = embeddings.detach().cpu().numpy()
64
- vector = np.float32(vector)
65
- faiss.normalize_L2(vector)
66
 
67
  # Read the index file and perform search of top 50 images
68
  distances, indices = index.search(vector, 50)
69
 
70
  matches = []
71
-
72
  for idx, matching_gamerpic in enumerate(indices[0]):
73
  gamerpic = {}
74
  gamerpic["id"] = images[matching_gamerpic]
75
  gamerpic["score"] = str(round((1 / (distances[0][idx] + 1) * 100), 2)) + "%"
76
-
77
  matches.append(gamerpic)
78
 
79
  return matches
@@ -82,8 +76,10 @@ def process_image(image):
82
  # Create a Gradio interface
83
  iface = gr.Interface(
84
  fn=process_image,
85
- inputs=gr.Image(type="pil"), # Adjust the shape as needed
86
- outputs="json", # Or any other output format that suits your needs
 
 
87
  ).queue()
88
 
89
  # Launch the Gradio app
 
6
  import numpy as np
7
  import faiss
8
 
 
9
  # Init similarity search AI model and processor
10
  device = torch.device("cpu")
11
+ processor = AutoImageProcessor.from_pretrained(
12
+ "facebook/dinov3-vitb16-pretrain-lvd1689m"
13
+ )
14
+ model = AutoModel.from_pretrained("facebook/dinov3-vitb16-pretrain-lvd1689m")
15
  model.config.return_dict = False # Set return_dict to False for JIT tracing
16
  model.to(device)
17
+ model.eval() # Set model to evaluation mode for inference
18
 
19
  # Prepare an example input for tracing
20
  example_input = torch.rand(1, 3, 224, 224).to(device) # Adjust size if needed
 
31
 
32
  def process_image(image):
33
  """
34
+ Process the image and extract features using the DINOv3 model.
35
  """
 
 
 
 
36
  # Convert to RGB if it isn't already
37
  if image.mode != "RGB":
38
  image = image.convert("RGB")
 
47
  h_percent = 224 / float(height)
48
  new_height = 224
49
  new_width = int(float(width) * float(h_percent))
50
+ image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
51
 
52
  # Extract the features from the uploaded image
53
  with torch.no_grad():
54
  inputs = processor(images=image, return_tensors="pt")["pixel_values"].to(device)
 
 
55
  outputs = traced_model(inputs)
56
 
57
+ # Normalize the features before search
58
+ embeddings = outputs[0].mean(dim=1)
59
+ vector = embeddings.detach().cpu().numpy()
60
+ vector = np.float32(vector)
61
+ faiss.normalize_L2(vector)
62
 
63
  # Read the index file and perform search of top 50 images
64
  distances, indices = index.search(vector, 50)
65
 
66
  matches = []
 
67
  for idx, matching_gamerpic in enumerate(indices[0]):
68
  gamerpic = {}
69
  gamerpic["id"] = images[matching_gamerpic]
70
  gamerpic["score"] = str(round((1 / (distances[0][idx] + 1) * 100), 2)) + "%"
 
71
  matches.append(gamerpic)
72
 
73
  return matches
 
76
  # Create a Gradio interface
77
  iface = gr.Interface(
78
  fn=process_image,
79
+ inputs=gr.Image(type="pil"),
80
+ outputs="json",
81
+ title="Xbox Gamerpic Finder - DINOv3",
82
+ description="Upload an image to find similar Xbox 360 gamerpics using Meta's DINOv3 vision model",
83
  ).queue()
84
 
85
  # Launch the Gradio app
xbgp-faiss.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf0c98cc8548885e189144a32cb31c352cd691ff4b8194d592bedb1160544ec0
3
+ size 100042797