Spaces:
Runtime error
Runtime error
Commit
·
dec5315
1
Parent(s):
a848b0f
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,30 +5,11 @@ import torch
|
|
| 5 |
from transformers import AutoTokenizer, AutoModel
|
| 6 |
import faiss
|
| 7 |
import numpy as np
|
| 8 |
-
import wget
|
| 9 |
from PIL import Image
|
| 10 |
from sentence_transformers import SentenceTransformer
|
| 11 |
import json
|
| 12 |
-
from zipfile import ZipFile
|
| 13 |
import zipfile
|
| 14 |
|
| 15 |
-
# Load the pre-trained sentence encoder
|
| 16 |
-
model_name = "sentence-transformers/all-distilroberta-v1"
|
| 17 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 18 |
-
model = SentenceTransformer(model_name)
|
| 19 |
-
|
| 20 |
-
# Define the path to the zip folder containing the images
|
| 21 |
-
zip_path = "Images.zip"
|
| 22 |
-
|
| 23 |
-
# Open the zip folder
|
| 24 |
-
zip_file = zipfile.ZipFile(zip_path)
|
| 25 |
-
|
| 26 |
-
vectors = np.load("./sbert_text_features.npy")
|
| 27 |
-
vector_dimension = vectors.shape[1]
|
| 28 |
-
index = faiss.IndexFlatL2(vector_dimension)
|
| 29 |
-
faiss.normalize_L2(vectors)
|
| 30 |
-
index.add(vectors)
|
| 31 |
-
|
| 32 |
# Map the image ids to the corresponding image URLs
|
| 33 |
image_map_name = 'captions.json'
|
| 34 |
|
|
@@ -37,8 +18,20 @@ with open(image_map_name, 'r') as f:
|
|
| 37 |
|
| 38 |
image_list = list(caption_dict.keys())
|
| 39 |
caption_list = list(caption_dict.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
def search(query, k=
|
| 42 |
# Encode the query
|
| 43 |
query_embedding = model.encode(query)
|
| 44 |
query_vector = np.array([query_embedding])
|
|
|
|
| 5 |
from transformers import AutoTokenizer, AutoModel
|
| 6 |
import faiss
|
| 7 |
import numpy as np
|
|
|
|
| 8 |
from PIL import Image
|
| 9 |
from sentence_transformers import SentenceTransformer
|
| 10 |
import json
|
|
|
|
| 11 |
import zipfile
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# Map the image ids to the corresponding image URLs
|
| 14 |
image_map_name = 'captions.json'
|
| 15 |
|
|
|
|
| 18 |
|
| 19 |
image_list = list(caption_dict.keys())
|
| 20 |
caption_list = list(caption_dict.values())
|
| 21 |
+
zip_path = "Images.zip"
|
| 22 |
+
zip_file = zipfile.ZipFile(zip_path)
|
| 23 |
+
|
| 24 |
+
model_name = "sentence-transformers/all-distilroberta-v1"
|
| 25 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 26 |
+
model = SentenceTransformer(model_name)
|
| 27 |
+
vectors = model.encode(caption_list)
|
| 28 |
+
# vectors = np.load("./sbert_text_features.npy")
|
| 29 |
+
vector_dimension = vectors.shape[1]
|
| 30 |
+
index = faiss.IndexFlatL2(vector_dimension)
|
| 31 |
+
faiss.normalize_L2(vectors)
|
| 32 |
+
index.add(vectors)
|
| 33 |
|
| 34 |
+
def search(query, k=4):
|
| 35 |
# Encode the query
|
| 36 |
query_embedding = model.encode(query)
|
| 37 |
query_vector = np.array([query_embedding])
|