Barvero commited on
Commit
2904b62
·
verified ·
1 Parent(s): 6f22828

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import core libraries
2
+ import numpy as np
3
+ import pandas as pd
4
+ import torch
5
+ import gradio as gr
6
+
7
+ # Import CLIP model and processor
8
+ from transformers import CLIPModel, CLIPProcessor
9
+
10
+ # Import image handling
11
+ from PIL import Image
12
+
13
+
14
+ # Select device (GPU if available)
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+
17
+
18
+ # Load pretrained CLIP model
19
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
20
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
21
+
22
+ # Move model to device and set evaluation mode
23
+ model = model.to(device)
24
+ model.eval()
25
+
26
+
27
+ # Load precomputed embeddings from file
28
+ emb_df = pd.read_parquet("clip_embeddings_3000.parquet")
29
+
30
+ # Extract image identifiers
31
+ sampled_ids = emb_df["image_id"].values
32
+
33
+ # Extract normalized embeddings matrix
34
+ embeddings = emb_df.drop(columns=["image_id"]).values.astype("float32")
35
+
36
+
37
+ # Attempt to load sampled indices (optional optimization)
38
+ try:
39
+ sampled_indices = np.load("sampled_indices_3000.npy")
40
+ except Exception:
41
+ sampled_indices = None
42
+
43
+
44
+ # Convert a user image into a normalized CLIP embedding
45
+ def embed_image(image: Image.Image):
46
+ # Preprocess image for CLIP
47
+ inputs = processor(images=image, return_tensors="pt")
48
+ inputs = {k: v.to(device) for k, v in inputs.items()}
49
+
50
+ # Extract image features without gradients
51
+ with torch.no_grad():
52
+ features = model.get_image_features(**inputs)
53
+
54
+ # Convert embedding to numpy and normalize
55
+ vec = features.cpu().numpy()[0]
56
+ vec = vec / np.linalg.norm(vec)
57
+
58
+ return vec
59
+
60
+
61
+ # Recommend top-3 visually similar images
62
+ def recommend(image):
63
+ # Embed user input image
64
+ user_vec = embed_image(image)
65
+
66
+ # Compute cosine similarity scores
67
+ scores = embeddings @ user_vec
68
+
69
+ # Retrieve top-4 results (including query itself)
70
+ top_idx = np.argsort(scores)[::-1][:4]
71
+
72
+ # Remove the first result (query image)
73
+ top_idx = top_idx[1:]
74
+
75
+ # Load recommended images
76
+ results = []
77
+ for idx in top_idx:
78
+ img = Image.open(sampled_ids[idx]).convert("RGB")
79
+ results.append(img)
80
+
81
+ return results
82
+
83
+
84
+ # Define Gradio interface
85
+ demo = gr.Interface(
86
+ fn=recommend,
87
+ inputs=gr.Image(type="pil", label="Upload an image"),
88
+ outputs=gr.Gallery(label="Top-3 Recommended Images"),
89
+ title="CLIP Image Recommendation System",
90
+ description="Upload an image and receive visually similar product recommendations."
91
+ )
92
+
93
+
94
+ # Launch the application
95
+ demo.launch()