Barvero commited on
Commit
89e8538
·
verified ·
1 Parent(s): 417ecca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -34
app.py CHANGED
@@ -4,20 +4,21 @@ import pandas as pd
4
  import torch
5
  import gradio as gr
6
 
 
 
 
7
  # Import CLIP model and processor
8
  from transformers import CLIPModel, CLIPProcessor
9
 
10
- # Import image handling
11
- from PIL import Image
12
-
13
 
14
  # Select device (GPU if available)
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
 
17
 
18
  # Load pretrained CLIP model
19
- model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
20
- processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 
21
 
22
  # Move model to device and set evaluation mode
23
  model = model.to(device)
@@ -27,24 +28,22 @@ model.eval()
27
  # Load precomputed embeddings from file
28
  emb_df = pd.read_parquet("clip_embeddings_3000.parquet")
29
 
30
- # Extract image identifiers
31
- sampled_ids = emb_df["image_id"].values
32
-
33
  # Extract normalized embeddings matrix
34
- embeddings = emb_df.drop(columns=["image_id"]).values.astype("float32")
35
 
36
 
37
- # Attempt to load sampled indices (optional optimization)
38
- try:
39
- sampled_indices = np.load("sampled_indices_3000.npy")
40
- except Exception:
41
- sampled_indices = None
 
42
 
43
 
44
  # Convert a user image into a normalized CLIP embedding
45
- def embed_image(image: Image.Image):
46
  # Preprocess image for CLIP
47
- inputs = processor(images=image, return_tensors="pt")
48
  inputs = {k: v.to(device) for k, v in inputs.items()}
49
 
50
  # Extract image features without gradients
@@ -52,42 +51,51 @@ def embed_image(image: Image.Image):
52
  features = model.get_image_features(**inputs)
53
 
54
  # Convert embedding to numpy and normalize
55
- vec = features.cpu().numpy()[0]
56
- vec = vec / np.linalg.norm(vec)
57
 
58
  return vec
59
 
60
 
61
  # Recommend top-3 visually similar images
62
  def recommend(image):
63
- # Embed user input image
64
- user_vec = embed_image(image)
 
 
 
 
65
 
66
- # Compute cosine similarity scores
67
- scores = embeddings @ user_vec
 
68
 
69
- # Retrieve top-4 results (including query itself)
70
- top_idx = np.argsort(scores)[::-1][:4]
71
 
72
- # Remove the first result (query image)
73
- top_idx = top_idx[1:]
 
 
 
74
 
75
- # Load recommended images
76
- results = []
77
- for idx in top_idx:
78
- img = Image.open(sampled_ids[idx]).convert("RGB")
79
- results.append(img)
80
 
81
- return results
 
82
 
83
 
84
  # Define Gradio interface
85
  demo = gr.Interface(
86
  fn=recommend,
87
  inputs=gr.Image(type="pil", label="Upload an image"),
88
- outputs=gr.Gallery(label="Top-3 Recommended Images"),
 
 
 
89
  title="CLIP Image Recommendation System",
90
- description="Upload an image and receive visually similar product recommendations."
 
91
  )
92
 
93
 
 
4
  import torch
5
  import gradio as gr
6
 
7
+ # Import dataset loader
8
+ from datasets import load_dataset
9
+
10
  # Import CLIP model and processor
11
  from transformers import CLIPModel, CLIPProcessor
12
 
 
 
 
13
 
14
  # Select device (GPU if available)
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
 
17
 
18
  # Load pretrained CLIP model
19
+ MODEL_NAME = "openai/clip-vit-base-patch32"
20
+ model = CLIPModel.from_pretrained(MODEL_NAME)
21
+ processor = CLIPProcessor.from_pretrained(MODEL_NAME)
22
 
23
  # Move model to device and set evaluation mode
24
  model = model.to(device)
 
28
  # Load precomputed embeddings from file
29
  emb_df = pd.read_parquet("clip_embeddings_3000.parquet")
30
 
 
 
 
31
  # Extract normalized embeddings matrix
32
+ embeddings = emb_df.drop(columns=["image_id"]).values.astype(np.float32)
33
 
34
 
35
+ # Load sampled indices (required to fetch the same 3000 images)
36
+ sampled_indices = np.load("sampled_indices_3000.npy").astype(int).tolist()
37
+
38
+ # Load dataset and select the sampled subset
39
+ ds = load_dataset("JamieSJS/stanford-online-products", "corpus")["corpus"]
40
+ sampled_dataset = ds.select(sampled_indices)
41
 
42
 
43
  # Convert a user image into a normalized CLIP embedding
44
+ def embed_image(image):
45
  # Preprocess image for CLIP
46
+ inputs = processor(images=[image], return_tensors="pt")
47
  inputs = {k: v.to(device) for k, v in inputs.items()}
48
 
49
  # Extract image features without gradients
 
51
  features = model.get_image_features(**inputs)
52
 
53
  # Convert embedding to numpy and normalize
54
+ vec = features.cpu().numpy().reshape(-1).astype(np.float32)
55
+ vec = vec / (np.linalg.norm(vec) + 1e-12)
56
 
57
  return vec
58
 
59
 
60
  # Recommend top-3 visually similar images
61
  def recommend(image):
62
+ try:
63
+ # Embed user input image
64
+ user_vec = embed_image(image)
65
+
66
+ # Compute cosine similarity scores
67
+ scores = embeddings @ user_vec
68
 
69
+ # Get Top-3 indices
70
+ top_idx = np.argsort(scores)[::-1][:3]
71
+ top_scores = scores[top_idx]
72
 
73
+ # Fetch images directly from the sampled dataset
74
+ results = [sampled_dataset[int(i)]["image"] for i in top_idx]
75
 
76
+ # Optional: return a short message for visibility
77
+ msg = (
78
+ f"Top-3 cosine similarity scores: "
79
+ f"{top_scores[0]:.3f}, {top_scores[1]:.3f}, {top_scores[2]:.3f}"
80
+ )
81
 
82
+ return results, msg
 
 
 
 
83
 
84
+ except Exception as e:
85
+ return [], f"Error: {str(e)}"
86
 
87
 
88
  # Define Gradio interface
89
  demo = gr.Interface(
90
  fn=recommend,
91
  inputs=gr.Image(type="pil", label="Upload an image"),
92
+ outputs=[
93
+ gr.Gallery(label="Top-3 Recommended Images"),
94
+ gr.Textbox(label="Details"),
95
+ ],
96
  title="CLIP Image Recommendation System",
97
+ description="Upload an image and receive visually similar product recommendations.",
98
+ allow_flagging="never",
99
  )
100
 
101