Spaces:
Running
on
Zero
Running
on
Zero
File size: 10,498 Bytes
e6f20b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
"""A wrapper around all asmk-related packages for convenient use"""
import os.path
import time
import numpy as np
from . import kernel as kern_pkg, codebook as cdb_pkg, index as idx_pkg, inverted_file as ivf_pkg
from . import io_helpers
class ASMKMethod:
"""
Class to keep necessary objects and provide easy access to asmk method's steps. Each step
of asmk method corresponds to one method. Use initialize_untrained() class method instead
of directly calling the constructor.
:param dict params: contains keys index, train_codebook, build_ivf and query_ivf, each
containing the corresponding step parameters
:param dict metadata: only stored by this object, never changed
:param Codebook codebook: object from the codebook module
:param ASMKKernel kernel: object from the kernel module
:param IVF inverted_file: object from the inverted_file module
"""
def __init__(self, params, metadata, *, codebook=None, kernel=None, inverted_file=None):
self.params = params
self.metadata = metadata
self.codebook = codebook
self.kernel = kernel
self.inverted_file = inverted_file
@classmethod
def initialize_untrained(cls, params):
"""Initialize asmk method before training, provided only params (see constructor docs)"""
return cls(params, {})
#
# Method steps
#
def train_codebook(self, *columns, cache_path=None, step_params=None):
"""The first step of the method - training the codebook (or loading from cache)
:param ndarray vecs: 2D numpy array, rows are vectors for codebook training
:param str cache_path: trained codebook will be stored under given file path and loaded
next time without training (None to turn off)
:param dict step_params: parameters that will override stored parameters for this step
(self.params['train_codebook'])
:return: new ASMKMethod object (containing metadata of this step), do not change self
"""
assert not self.codebook, "Codebook already trained"
index_factory = idx_pkg.initialize_index(**self.params['index'])
step_params = step_params or self.params.get("train_codebook")
if cache_path and os.path.exists(cache_path):
time0 = time.time()
cdb = cdb_pkg.Codebook.initialize_from_state(io_helpers.load_pickle(cache_path),
index_factory=index_factory)
cdb.index()
assert cdb.params == step_params['codebook']
metadata = {"load_time": time.time() - time0}
else:
cdb = cdb_pkg.Codebook(**step_params['codebook'], index_factory=index_factory)
metadata = cdb.train(*columns)
if cache_path:
io_helpers.save_pickle(cache_path, cdb.state_dict())
metadata["index_class"] = index_factory.__class__.__name__
return self.__class__({**self.params, "train_codebook": step_params},
{**self.metadata, "train_codebook": metadata},
codebook=cdb)
def build_ivf(self, *columns, distractors_path=None, cache_path=None, step_params=None):
"""The second step of the method - building the ivf (or loading from cache)
:param ndarray vecs: 2D numpy array, rows are vectors to be indexed by the ivf
:param ndarray imids: 1D numpy array of image ids corresponding to 'vecs'
:param str distractors_path: ivf will be initialized with given distractors ivf path
:param str cache_path: built ivf will be stored under given file path and loaded
next time without training (None to turn off)
:param dict step_params: parameters that will override stored parameters for this step
(self.params['build_ivf'])
:return: new ASMKMethod object (containing metadata of this step), do not change self
"""
builder = self.create_ivf_builder(cache_path=cache_path, step_params=step_params)
# Skip if loaded, otherwise add everything at once
if not builder.loaded_from_cache:
if distractors_path:
builder.initialize_with_distractors(distractors_path)
builder.add(*columns)
return self.add_ivf_builder(builder)
def create_ivf_builder(self, *, cache_path=None, step_params=None):
"""Part of the second step of the method, see build_ivf() method implementation for usage
:param str cache_path: built ivf will be stored under given file path and loaded
next time without training (None to turn off)
:param dict step_params: parameters that will override stored parameters for this step
(self.params['build_ivf'])
:return: IvfBuilder object
"""
assert not self.kernel and not self.inverted_file, "Inverted file already built"
step_params = step_params or self.params.get("build_ivf")
kern = kern_pkg.ASMKKernel(self.codebook, **step_params['kernel'])
return IvfBuilder(step_params, self.codebook, kern, cache_path=cache_path)
def add_ivf_builder(self, ivf_builder):
"""Part of the second step of the method, see build_ivf() method implementation for usage
:param IvfBuilder ivf_builder: Builder with vectors added
:return: new ASMKMethod object (containing metadata from the builder), do not change self
"""
ivf_metadata = ivf_builder.save()
return self.__class__({**self.params, "build_ivf": ivf_builder.step_params},
{**self.metadata, "build_ivf": ivf_metadata},
codebook=self.codebook, kernel=ivf_builder.kernel,
inverted_file=ivf_builder.ivf)
def query_ivf(self, *columns, step_params=None, progress=None):
"""The last step of the method - querying the ivf
:param ndarray qvecs: 2D numpy array, rows are vectors, each acting as a query for the ivf
:param ndarray qimids: 1D numpy array of image ids corresponding to 'qvecs'
:param dict step_params: parameters that will override stored parameters for this step
(self.params['query_ivf'])
:param bool progress: step at which update progress printing (None to disable)
:return: tuple (dict metadata, ndarray images, 2D ndarray ranks, 2D ndarray scores), do not
change self
"""
step_params = step_params or self.params.get("query_ivf")
time0 = time.time()
images, ranks, scores = self.accumulate_scores(self.codebook, self.kernel, \
self.inverted_file, *columns, params=step_params, progress=progress)
metadata = {"query_avg_time": (time.time()-time0)/len(ranks)}
return metadata, images, ranks, scores
#
# Helper functions
#
@staticmethod
def accumulate_scores(cdb, kern, ivf, qvecs, qimids, *cols, params, progress=None):
"""Accumulate scores for every query image (qvecs, qimids) given codebook, kernel,
inverted_file and parameters."""
similarity_func = lambda *x: kern.similarity(*x, **params["similarity"])
acc = []
slices = list(io_helpers.slice_unique(qimids))
for imid, seq in io_helpers.progress(slices, frequency=progress, header="Query"):
quantized = cdb.quantize(qvecs[seq], *(x[seq] for x in cols), **params["quantize"])
aggregated = kern.aggregate_image(*quantized, **params["aggregate"])
ranks, scores = ivf.search(*aggregated, **params["search"], similarity_func=similarity_func)
acc.append((imid, ranks, scores))
imids_all, ranks_all, scores_all = zip(*acc)
return np.array(imids_all), np.vstack(ranks_all), np.vstack(scores_all)
class IvfBuilder:
"""Inverted file (IVF) wrapper simplifying vector addition
:param dict step_params: contains parameters for build_ivf step
:param Codebook codebook: object from the codebook module
:param ASMKKernel kernel: object from the kernel module
:param str cache_path: built ivf will be stored under given file path and loaded
next time without training (None to turn off)
"""
def __init__(self, step_params, codebook, kernel, *, cache_path):
self.step_params = step_params
self.codebook = codebook
self.kernel = kernel
if cache_path and os.path.exists(cache_path):
time0 = time.time()
self.ivf = ivf_pkg.IVF.initialize_from_state(io_helpers.load_pickle(cache_path))
self.metadata = {"load_time": time.time() - time0}
self.cache_path = None
else:
self.ivf = ivf_pkg.IVF.initialize_empty(**step_params['ivf'],
codebook_size=codebook.size)
self.metadata = {"index_time": 0}
self.cache_path = cache_path
@property
def loaded_from_cache(self):
"""If the contained IVF was loaded (otherwise, it is empty after initialization)"""
return "load_time" in self.metadata
def initialize_with_distractors(self, path):
"""Initialize with distractors ivf at given path"""
self.ivf = ivf_pkg.IVF.initialize_from_state(io_helpers.load_pickle(path))
self.ivf.imid_offset = self.ivf.n_images
def add(self, *columns, progress=None):
"""Add descriptors and cooresponding image ids to the IVF
:param np.ndarray vecs: 2D array of local descriptors
:param np.ndarray imids: 1D array of image ids
:param bool progress: step at which update progress printing (None to disable)
"""
time0 = time.time()
quantized = self.codebook.quantize(*columns, **self.step_params["quantize"])
if progress:
print(">> Descriptors quantized")
aggregated = self.kernel.aggregate(*quantized, **self.step_params["aggregate"], progress=progress)
self.ivf.add(*aggregated, progress=200*progress if progress else None)
self.metadata['index_time'] += time.time() - time0
def save(self):
"""Save to cache path if defined
:return: dict metadata with ivf stats
"""
if self.cache_path:
io_helpers.save_pickle(self.cache_path, self.ivf.state_dict())
return {**self.metadata, "ivf_stats": self.ivf.stats}
|