Upload with huggingface_hub
Browse files- .gitattributes +2 -0
- build_cc3m_index.py +34 -0
- cc3m_imagebind_files.json +3 -0
- faiss_cc3m_search.py +12 -0
- infos.json +1 -0
- knn.index +3 -0
.gitattributes
CHANGED
|
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
cc3m_imagebind_files.json filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
knn.index filter=lfs diff=lfs merge=lfs -text
|
build_cc3m_index.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import glob
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import tqdm
|
| 5 |
+
import numpy as np
|
| 6 |
+
from multiprocessing import Pool
|
| 7 |
+
from autofaiss import build_index
|
| 8 |
+
|
| 9 |
+
def load_file(file):
|
| 10 |
+
return np.load(file)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
in_dir = '/data0/ImageBindFeatures/cc3m/'
|
| 14 |
+
save_path = "cc3m_imagebind.npy"
|
| 15 |
+
|
| 16 |
+
files = glob.glob(in_dir + '*/*.npy')
|
| 17 |
+
|
| 18 |
+
pool = Pool(256)
|
| 19 |
+
results = pool.map(load_file, files)
|
| 20 |
+
results = np.stack(results, axis=0)
|
| 21 |
+
|
| 22 |
+
np.save(save_path, results)
|
| 23 |
+
|
| 24 |
+
with open('cc3m_imagebind_files.json', 'w') as f:
|
| 25 |
+
json.dump([x.replace(in_dir, '') for x in files], f)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# build index
|
| 29 |
+
build_index(embeddings="./",
|
| 30 |
+
index_path="knn.index",
|
| 31 |
+
index_infos_path="infos.json",
|
| 32 |
+
max_index_memory_usage = "32G",
|
| 33 |
+
current_memory_available = "100G",
|
| 34 |
+
metric_type='ip')
|
cc3m_imagebind_files.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed9ea276a776b47c47868fc13bab2c1813f167c3155b6aab58d37dc2939e8df8
|
| 3 |
+
size 129571307
|
faiss_cc3m_search.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import faiss
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
my_index = faiss.read_index("knn.index")
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
query = torch.rand(3, 1024)
|
| 8 |
+
|
| 9 |
+
k = 5
|
| 10 |
+
distances, indices = my_index.search(query, k)
|
| 11 |
+
|
| 12 |
+
import pdb;pdb.set_trace()
|
infos.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"index_key": "HNSW32", "index_param": "efSearch=16", "index_path": "/data1/llama_adapter_project/extract_ImageBind_features/autofaiss_cc3m/autofaiss/knn.index", "size in bytes": 13264998158, "avg_search_speed_ms": 36.55575554355653, "99p_search_speed_ms": 46.38748399913309, "reconstruction error %": 0.0, "nb vectors": 3036761, "vectors dimension": 1024, "compression ratio": 0.9376988151708419}
|
knn.index
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c39db2288566cebf9aca85306567ce54dd5d2459df8f568717be726af376560b
|
| 3 |
+
size 13264998158
|