Spaces:

TheProjectsGuy
/

AnyLoc

Runtime error

App Files Files Community

TheProjectsGuy commited on Aug 14, 2023

Commit

61364af

1 Parent(s): 9543bb3

Uploaded normal app layout (without extractor)

Browse files

Files changed (4) hide show

app.py +288 -0
packages.txt +2 -0
requirements.txt +11 -0
utilities.py +478 -0

app.py ADDED Viewed

	@@ -0,0 +1,288 @@

+# Show VLAD clustering for set of example images or a user image
+"""
+    User input:
+    - Domain: Indoor, Aerial, or Urban
+    - Image: Image to be clustered
+    - Cluster numbers (to visualize)
+    - Pixel coordinates (to pick further clusters)
+    - A unique cache ID (to store the DINO forward passes)
+    There are example images for each domain.
+    Output:
+    - All images with cluster assignments
+    Some Gradio links:
+    - Controlling layout
+        - https://www.gradio.app/guides/quickstart#blocks-more-flexibility-and-control
+    - Data state (persistence)
+        - https://www.gradio.app/guides/interface-state
+        - https://www.gradio.app/docs/state
+    - Layout control
+        - https://www.gradio.app/guides/controlling-layout
+        - https://www.gradio.app/guides/blocks-and-event-listeners
+"""
+# %%
+import os
+import gradio as gr
+import numpy as np
+import cv2 as cv
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision import transforms as tvf
+from torchvision.transforms import functional as T
+from PIL import Image
+import matplotlib.pyplot as plt
+import distinctipy as dipy
+from typing import Literal, List
+import gradio as gr
+import time
+import glob
+import shutil
+from copy import deepcopy
+# DINOv2 imports
+from utilities import DinoV2ExtractFeatures
+from utilities import VLAD
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# %%
+# Configurations
+T1 = Literal["query", "key", "value", "token"]
+T2 = Literal["aerial", "indoor", "urban"]
+DOMAINS = ["aerial", "indoor", "urban"]
+T3 = Literal["dinov2_vits14", "dinov2_vitb14", "dinov2_vitl14",
+                "dinov2_vitg14"]
+_ex = lambda x: os.path.realpath(os.path.expanduser(x))
+dino_model: T3 = "dinov2_vitg14"
+desc_layer: int = 31
+desc_facet: T1 = "value"
+num_c: int = 8
+cache_dir: str = _ex("./cache") # Directory containing program cache
+max_img_size: int = 1024    # Image resolution (max dim/size)
+max_num_imgs: int = 10      # Max number of images to upload
+share: bool = False          # Share application using .gradio link
+# Verify inputs
+assert os.path.isdir(cache_dir), "Cache directory not found"
+# %%
+# Model and transforms
+print("Loading DINO model")
+# extractor = DinoV2ExtractFeatures(dino_model, desc_layer, desc_facet,
+#                                     device=device)
+extractor = None
+print("DINO model loaded")
+# VLAD path (directory)
+ext_s = f"{dino_model}/l{desc_layer}_{desc_facet}_c{num_c}"
+vc_dir = os.path.join(cache_dir, "vocabulary", ext_s)
+# Base image transformations
+base_tf = tvf.Compose([
+    tvf.ToTensor(),
+    tvf.Normalize(mean=[0.485, 0.456, 0.406],
+                    std=[0.229, 0.224, 0.225])
+])
+# %%
+# Get VLAD object
+def get_vlad_clusters(domain, pr = gr.Progress()):
+    dm: T2 = str(domain).lower()
+    assert dm in DOMAINS, "Invalid domain"
+    # Load VLAD cluster centers
+    pr(0, desc="Loading VLAD clusters")
+    c_centers_file = os.path.join(vc_dir, dm, "c_centers.pt")
+    if not os.path.isfile(c_centers_file):
+        return f"Cluster centers not found for: {domain}", None
+    c_centers = torch.load(c_centers_file)
+    pr(0.5)
+    num_c = c_centers.shape[0]
+    desc_dim = c_centers.shape[1]
+    vlad = VLAD(num_c, desc_dim,
+            cache_dir=os.path.dirname(c_centers_file))
+    vlad.fit(None)  # Restore the cache
+    pr(1)
+    return f"VLAD clusters loaded for: {domain}", vlad
+# %%
+# Get VLAD descriptors
+@torch.no_grad()
+def get_descs(imgs_batch, pr = gr.Progress()):
+    imgs_batch: List[np.ndarray] = imgs_batch
+    pr(0, desc="Extracting descriptors")
+    patch_descs = []
+    for i, img in enumerate(imgs_batch):
+        # Convert to PIL image
+        pil_img = Image.fromarray(img)
+        img_pt = base_tf(pil_img).to(device)
+        if max(img_pt.shape[-2:]) > max_img_size:
+            print(f"Image {i+1}: {img_pt.shape[-2:]}, outside")
+            c, h, w = img_pt.shape
+            # Maintain aspect ratio
+            if h == max(img_pt.shape[-2:]):
+                w = int(w * max_img_size / h)
+                h = max_img_size
+            else:
+                h = int(h * max_img_size / w)
+                w = max_img_size
+            img_pt = T.resize(img_pt, (h, w),
+                interpolation=T.InterpolationMode.BICUBIC)
+            pil_img = pil_img.resize((w, h))    # Backup
+        # Make image patchable
+        c, h, w = img_pt.shape
+        h_new, w_new = (h // 14) * 14, (w // 14) * 14
+        img_pt = tvf.CenterCrop((h_new, w_new))(img_pt)[None, ...]
+        # Extract descriptors
+        ret = extractor(img_pt).cpu()  # [1, n_p, d]
+        patch_descs.append({"img": pil_img, "descs": ret})
+        pr((i+1) / len(imgs_batch))
+    return patch_descs, \
+            f"Descriptors extracted for {len(imgs_batch)} images"
+# %%
+# Assign VLAD clusters (descriptor assignment)
+def assign_vlad(patch_descs, vlad, pr = gr.Progress()):
+    vlad: VLAD = vlad
+    img_patch_descs = [pd["descs"] for pd in patch_descs]
+    pr(0, desc="Assigning VLAD clusters")
+    desc_assignments = []   # List[Tensor;shape=('h', 'w');int]
+    for i, qu_desc in enumerate(img_patch_descs):
+        # Residual vectors; 'n' could differ (based on img sizes)
+        res = vlad.generate_res_vec(qu_desc[0]) # ['n', n_c, d]
+        img = patch_descs[i]["img"]
+        h, w, c = np.array(img).shape
+        h_p, w_p = h // 14, w // 14
+        h_new, w_new = h_p * 14, w_p * 14
+        assert h_p * w_p == res.shape[0], "Residual incorrect!"
+        # Descriptor assignments
+        da = res.abs().sum(dim=2).argmin(dim=1).reshape(h_p, w_p)
+        da = F.interpolate(da[None, None, ...].to(float),
+                (h_new, w_new), mode="nearest")[0, 0].to(da.dtype)
+        desc_assignments.append(da)
+        pr((i+1) / len(img_patch_descs))
+    pr(1.0)
+    return desc_assignments, "VLAD clusters assigned"
+# %%
+# Cluster assignments to images
+def get_ca_images(desc_assignments, patch_descs, alpha,
+            pr = gr.Progress()):
+    if desc_assignments is None or len(desc_assignments) == 0:
+        return None, "First load images"
+    c_colors = dipy.get_colors(num_c, rng=928,
+            colorblind_type="Deuteranomaly")
+    np_colors = (np.array(c_colors) * 255).astype(np.uint8)
+    # Get images with clusters
+    pil_imgs = [pd["img"] for pd in patch_descs]
+    res_imgs = []   # List[PIL.Image]
+    pr(0, desc="Generating cluster assignment images")
+    for i, pil_img in enumerate(pil_imgs):
+        # Descriptor assignment image: [h, w, 3]
+        da: torch.Tensor = desc_assignments[i]    # ['h', 'w']
+        da_img = np.zeros((*da.shape, 3), dtype=np.uint8)
+        for c in range(num_c):
+            da_img[da == c] = np_colors[c]
+        # Background image: [h, w, 3]
+        img_np = np.array(pil_img, dtype=np.uint8)
+        h, w, c = np.array(img_np).shape
+        h_p, w_p = (h // 14), (w // 14)
+        h_new, w_new = h_p * 14, w_p * 14
+        img_np = F.interpolate(torch.tensor(img_np)\
+                .permute(2, 0, 1)[None, ...], (h_new, w_new),
+                mode='nearest')[0].permute(1, 2, 0).numpy()
+        res_img = cv.addWeighted(img_np, 1 - alpha, da_img, alpha, 0.)
+        res_imgs.append(Image.fromarray(res_img))
+        pr((i+1) / len(pil_imgs))
+    pr(1.0)
+    return res_imgs, "Cluster assignment images generated"
+# %%
+print("Interface build started")
+# Build the interface
+with gr.Blocks() as demo:
+    # ---- Helper functions ----
+    # Variable number of input images
+    def var_num_img(s):
+        n = int(s)  # Slider value as int
+        return [gr.Image.update(label=f"Image {i+1}", visible=True) \
+                for i in range(n)] + [gr.Image.update(visible=False) \
+                        for _ in range(max_num_imgs - n)]
+    # ---- State declarations ----
+    vlad = gr.State()   # VLAD object
+    desc_assignments = gr.State()   # Cluster assignments
+    imgs_batch = gr.State() # Images as batch
+    patch_descs = gr.State()    # Patch descriptors
+    # ---- All UI elements ----
+    d_vals = [k.title() for k in DOMAINS]
+    domain = gr.Radio(d_vals, value=d_vals[0])
+    nimg_s = gr.Slider(1, max_num_imgs, value=1, step=1,
+            label="How many images?")   # How many images?
+    with gr.Row():  # Dynamic row (images in columns)
+        imgs = [gr.Image(label=f"Image {i+1}", visible=True) \
+                for i in range(nimg_s.value)] + \
+                [gr.Image(visible=False) \
+                for _ in range(max_num_imgs - nimg_s.value)]
+        for i, img in enumerate(imgs):  # Set image as "input"
+            img.change(lambda _: None, img)
+    with gr.Row():  # Dynamic row of output (cluster) images
+        imgs2 = [gr.Image(label=f"VLAD Clusters {i+1}",
+                visible=False) for i in range(max_num_imgs)]
+    nimg_s.change(var_num_img, nimg_s, imgs)
+    blend_alpha = gr.Slider(0, 1, 0.4, step=0.01, # Cluster centers
+        label="Blend alpha (weight for cluster centers)")
+    bttn1 = gr.Button("Click Me!")  # Cluster assignment
+    out_msg1 = gr.Markdown("Select domain and upload images")
+    out_msg2 = gr.Markdown("For descriptor extraction")
+    out_msg3 = gr.Markdown("Followed by VLAD assignment")
+    out_msg4 = gr.Markdown("Followed by cluster images")
+    # ---- Utility functions ----
+    # A wrapper to batch the images
+    def batch_images(data):
+        sv = data[nimg_s]
+        images: List[np.ndarray] = [data[imgs[k]] \
+                for k in range(sv)]
+        return images
+    # A wrapper to unbatch images (and pad to max)
+    def unbatch_images(imgs_batch):
+        ret = [gr.Image.update(visible=False) \
+                for _ in range(max_num_imgs)]
+        if imgs_batch is None or len(imgs_batch) == 0:
+            return ret
+        for i, img_pil in enumerate(imgs_batch):
+            img_np = np.array(img_pil)
+            ret[i] = gr.Image.update(img_np, visible=True)
+        return ret
+    # ---- Main pipeline ----
+    # Get the VLAD cluster assignment images on click
+    bttn1.click(get_vlad_clusters, domain, [out_msg1, vlad])\
+        .then(batch_images, {nimg_s, *imgs, imgs_batch}, imgs_batch)\
+        .then(get_descs, imgs_batch, [patch_descs, out_msg2])\
+        .then(assign_vlad, [patch_descs, vlad],
+                [desc_assignments, out_msg3])\
+        .then(get_ca_images,
+                [desc_assignments, patch_descs, blend_alpha],
+                [imgs_batch, out_msg4])\
+        .then(unbatch_images, imgs_batch, imgs2)
+    # If the blending changes now, update the cluster images
+    blend_alpha.change(get_ca_images,
+            [desc_assignments, patch_descs, blend_alpha],
+            [imgs_batch, out_msg4])\
+        .then(unbatch_images, imgs_batch, imgs2)
+print("Interface build completed")
+# %%
+# Deploy application
+demo.queue().launch(share=share)
+print("Application deployment ended, exiting...")

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ python3-opencv
2	+

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+numpy
+opencv-python
+torch
+torchvision
+torchaudio
+pillow
+matplotlib
+distinctipy
+einops
+fast_pytorch_kmeans

utilities.py ADDED Viewed

	@@ -0,0 +1,478 @@

+# A portable utility module for the demo programs
+# %%
+import os
+import numpy as np
+import einops as ein
+import torch
+from torch import nn
+from torch.nn import functional as F
+import fast_pytorch_kmeans as fpk
+from typing import Literal, Union, List
+# %%
+# Extract features from a Dino-v2 model
+_DINO_V2_MODELS = Literal["dinov2_vits14", "dinov2_vitb14", \
+                        "dinov2_vitl14", "dinov2_vitg14"]
+_DINO_FACETS = Literal["query", "key", "value", "token"]
+class DinoV2ExtractFeatures:
+    """
+        Extract features from an intermediate layer in Dino-v2
+    """
+    def __init__(self, dino_model: _DINO_V2_MODELS, layer: int,
+                facet: _DINO_FACETS="token", use_cls=False,
+                norm_descs=True, device: str = "cpu") -> None:
+        """
+            Parameters:
+            - dino_model:   The DINO-v2 model to use
+            - layer:        The layer to extract features from
+            - facet:    "query", "key", or "value" for the attention
+                        facets. "token" for the output of the layer.
+            - use_cls:  If True, the CLS token (first item) is also
+                        included in the returned list of descriptors.
+                        Otherwise, only patch descriptors are used.
+            - norm_descs:   If True, the descriptors are normalized
+            - device:   PyTorch device to use
+        """
+        self.vit_type: str = dino_model
+        self.dino_model: nn.Module = torch.hub.load(
+                'facebookresearch/dinov2', dino_model)
+        self.device = torch.device(device)
+        self.dino_model = self.dino_model.eval().to(self.device)
+        self.layer: int = layer
+        self.facet = facet
+        if self.facet == "token":
+            self.fh_handle = self.dino_model.blocks[self.layer].\
+                    register_forward_hook(
+                            self._generate_forward_hook())
+        else:
+            self.fh_handle = self.dino_model.blocks[self.layer].\
+                    attn.qkv.register_forward_hook(
+                            self._generate_forward_hook())
+        self.use_cls = use_cls
+        self.norm_descs = norm_descs
+        # Hook data
+        self._hook_out = None
+    def _generate_forward_hook(self):
+        def _forward_hook(module, inputs, output):
+            self._hook_out = output
+        return _forward_hook
+    def __call__(self, img: torch.Tensor) -> torch.Tensor:
+        """
+            Parameters:
+            - img:   The input image
+        """
+        with torch.no_grad():
+            res = self.dino_model(img)
+            if self.use_cls:
+                res = self._hook_out
+            else:
+                res = self._hook_out[:, 1:, ...]
+            if self.facet in ["query", "key", "value"]:
+                d_len = res.shape[2] // 3
+                if self.facet == "query":
+                    res = res[:, :, :d_len]
+                elif self.facet == "key":
+                    res = res[:, :, d_len:2*d_len]
+                else:
+                    res = res[:, :, 2*d_len:]
+        if self.norm_descs:
+            res = F.normalize(res, dim=-1)
+        self._hook_out = None   # Reset the hook
+        return res
+    def __del__(self):
+        self.fh_handle.remove()
+# %%
+# VLAD global descriptor implementation
+class VLAD:
+    """
+        An implementation of VLAD algorithm given database and query
+        descriptors.
+        Constructor arguments:
+        - num_clusters:     Number of cluster centers for VLAD
+        - desc_dim:         Descriptor dimension. If None, then it is
+                            inferred when running `fit` method.
+        - intra_norm:       If True, intra normalization is applied
+                            when constructing VLAD
+        - norm_descs:       If True, the given descriptors are
+                            normalized before training and predicting
+                            VLAD descriptors. Different from the
+                            `intra_norm` argument.
+        - dist_mode:        Distance mode for KMeans clustering for
+                            vocabulary (not residuals). Must be in
+                            {'euclidean', 'cosine'}.
+        - vlad_mode:        Mode for descriptor assignment (to cluster
+                            centers) in VLAD generation. Must be in
+                            {'soft', 'hard'}
+        - soft_temp:        Temperature for softmax (if 'vald_mode' is
+                            'soft') for assignment
+        - cache_dir:        Directory to cache the VLAD vectors. If
+                            None, then no caching is done. If a str,
+                            then it is assumed as the folder path. Use
+                            absolute paths.
+        Notes:
+        - Arandjelovic, Relja, and Andrew Zisserman. "All about VLAD."
+            Proceedings of the IEEE conference on Computer Vision and
+            Pattern Recognition. 2013.
+    """
+    def __init__(self, num_clusters: int,
+                desc_dim: Union[int, None]=None,
+                intra_norm: bool=True, norm_descs: bool=True,
+                dist_mode: str="cosine", vlad_mode: str="hard",
+                soft_temp: float=1.0,
+                cache_dir: Union[str,None]=None) -> None:
+        self.num_clusters = num_clusters
+        self.desc_dim = desc_dim
+        self.intra_norm = intra_norm
+        self.norm_descs = norm_descs
+        self.mode = dist_mode
+        self.vlad_mode = str(vlad_mode).lower()
+        assert self.vlad_mode in ['soft', 'hard']
+        self.soft_temp = soft_temp
+        # Set in the training phase
+        self.c_centers = None
+        self.kmeans = None
+        # Set the caching
+        self.cache_dir = cache_dir
+        if self.cache_dir is not None:
+            self.cache_dir = os.path.abspath(os.path.expanduser(
+                    self.cache_dir))
+            if not os.path.exists(self.cache_dir):
+                os.makedirs(self.cache_dir)
+                print(f"Created cache directory: {self.cache_dir}")
+            else:
+                print("Warning: Cache directory already exists: " \
+                        f"{self.cache_dir}")
+        else:
+            print("VLAD caching is disabled.")
+    def can_use_cache_vlad(self):
+        """
+            Checks if the cache directory is a valid cache directory.
+            For it to be valid, it must exist and should at least
+            include the cluster centers file.
+            Returns:
+            - True if the cache directory is valid
+            - False if
+                - the cache directory doesn't exist
+                - exists but doesn't contain the cluster centers
+                - no caching is set in constructor
+        """
+        if self.cache_dir is None:
+            return False
+        if not os.path.exists(self.cache_dir):
+            return False
+        if os.path.exists(f"{self.cache_dir}/c_centers.pt"):
+            return True
+        else:
+            return False
+    def can_use_cache_ids(self,
+                cache_ids: Union[List[str], str, None],
+                only_residuals: bool=False) -> bool:
+        """
+            Checks if the given cache IDs exist in the cache directory
+            and returns True if all of them exist.
+            The cache is stored in the following files:
+            - c_centers.pt:     Cluster centers
+            - `cache_id`_r.pt:  Residuals for VLAD
+            - `cache_id`_l.pt:  Labels for VLAD (hard assignment)
+            - `cache_id`_s.pt:  Soft assignment for VLAD
+            The function returns False if cache cannot be used or if
+            any of the cache IDs are not found. If all cache IDs are
+            found, then True is returned.
+            This function is mainly for use outside the VLAD class.
+        """
+        if not self.can_use_cache_vlad():
+            return False
+        if cache_ids is None:
+            return False
+        if isinstance(cache_ids, str):
+            cache_ids = [cache_ids]
+        for cache_id in cache_ids:
+            if not os.path.exists(
+                    f"{self.cache_dir}/{cache_id}_r.pt"):
+                return False
+            if self.vlad_mode == "hard" and not os.path.exists(
+                    f"{self.cache_dir}/{cache_id}_l.pt") and not \
+                        only_residuals:
+                return False
+            if self.vlad_mode == "soft" and not os.path.exists(
+                    f"{self.cache_dir}/{cache_id}_s.pt") and not \
+                        only_residuals:
+                return False
+        return True
+    # Generate cluster centers
+    def fit(self, train_descs: Union[np.ndarray, torch.Tensor, None]):
+        """
+            Using the training descriptors, generate the cluster
+            centers (vocabulary). Function expects all descriptors in
+            a single list (see `fit_and_generate` for a batch of
+            images).
+            If the cache directory is valid, then retrieves cluster
+            centers from there (the `train_descs` are ignored).
+            Otherwise, stores the cluster centers in the cache
+            directory (if using caching).
+            Parameters:
+            - train_descs:  Training descriptors of shape
+                            [num_train_desc, desc_dim]. If None, then
+                            caching should be valid (else ValueError).
+        """
+        # Clustering to create vocabulary
+        self.kmeans = fpk.KMeans(self.num_clusters, mode=self.mode)
+        # Check if cache exists
+        if self.can_use_cache_vlad():
+            print("Using cached cluster centers")
+            self.c_centers = torch.load(
+                    f"{self.cache_dir}/c_centers.pt")
+            self.kmeans.centroids = self.c_centers
+            if self.desc_dim is None:
+                self.desc_dim = self.c_centers.shape[1]
+                print(f"Desc dim set to {self.desc_dim}")
+        else:
+            if train_descs is None:
+                raise ValueError("No training descriptors given")
+            if type(train_descs) == np.ndarray:
+                train_descs = torch.from_numpy(train_descs).\
+                    to(torch.float32)
+            if self.desc_dim is None:
+                self.desc_dim = train_descs.shape[1]
+            if self.norm_descs:
+                train_descs = F.normalize(train_descs)
+            self.kmeans.fit(train_descs)
+            self.c_centers = self.kmeans.centroids
+            if self.cache_dir is not None:
+                print("Caching cluster centers")
+                torch.save(self.c_centers,
+                        f"{self.cache_dir}/c_centers.pt")
+    def fit_and_generate(self,
+                train_descs: Union[np.ndarray, torch.Tensor]) \
+                -> torch.Tensor:
+        """
+            Given a batch of descriptors over images, `fit` the VLAD
+            and generate the global descriptors for the training
+            images. Use only when there are a fixed number of
+            descriptors in each image.
+            Parameters:
+            - train_descs:  Training image descriptors of shape
+                            [num_imgs, num_descs, desc_dim]. There are
+                            'num_imgs' images, each image has
+                            'num_descs' descriptors and each
+                            descriptor is 'desc_dim' dimensional.
+            Returns:
+            - train_vlads:  The VLAD vectors of all training images.
+                            Shape: [num_imgs, num_clusters*desc_dim]
+        """
+        # Generate vocabulary
+        all_descs = ein.rearrange(train_descs, "n k d -> (n k) d")
+        self.fit(all_descs)
+        # For each image, stack VLAD
+        return torch.stack([self.generate(tr) for tr in train_descs])
+    def generate(self, query_descs: Union[np.ndarray, torch.Tensor],
+                cache_id: Union[str, None]=None) -> torch.Tensor:
+        """
+            Given the query descriptors, generate a VLAD vector. Call
+            `fit` before using this method. Use this for only single
+            images and with descriptors stacked. Use function
+            `generate_multi` for multiple images.
+            Parameters:
+            - query_descs:  Query descriptors of shape [n_q, desc_dim]
+                            where 'n_q' is number of 'desc_dim'
+                            dimensional descriptors in a query image.
+            - cache_id:     If not None, then the VLAD vector is
+                            constructed using the residual and labels
+                            from this file.
+            Returns:
+            - n_vlas:   Normalized VLAD: [num_clusters*desc_dim]
+        """
+        residuals = self.generate_res_vec(query_descs, cache_id)
+        # Un-normalized VLAD vector: [c*d,]
+        un_vlad = torch.zeros(self.num_clusters * self.desc_dim)
+        if self.vlad_mode == 'hard':
+            # Get labels for assignment of descriptors
+            if cache_id is not None and self.can_use_cache_vlad() \
+                    and os.path.isfile(
+                        f"{self.cache_dir}/{cache_id}_l.pt"):
+                labels = torch.load(
+                        f"{self.cache_dir}/{cache_id}_l.pt")
+            else:
+                labels = self.kmeans.predict(query_descs)   # [q]
+                if cache_id is not None and self.can_use_cache_vlad():
+                    torch.save(labels,
+                            f"{self.cache_dir}/{cache_id}_l.pt")
+            # Create VLAD from residuals and labels
+            used_clusters = set(labels.numpy())
+            for k in used_clusters:
+                # Sum of residuals for the descriptors in the cluster
+                #  Shape:[q, c, d]  ->  [q', d] -> [d]
+                cd_sum = residuals[labels==k,k].sum(dim=0)
+                if self.intra_norm:
+                    cd_sum = F.normalize(cd_sum, dim=0)
+                un_vlad[k*self.desc_dim:(k+1)*self.desc_dim] = cd_sum
+        else:       # Soft cluster assignment
+            # Cosine similarity: 1 = close, -1 = away
+            if cache_id is not None and self.can_use_cache_vlad() \
+                    and os.path.isfile(
+                        f"{self.cache_dir}/{cache_id}_s.pt"):
+                soft_assign = torch.load(
+                        f"{self.cache_dir}/{cache_id}_s.pt")
+            else:
+                cos_sims = F.cosine_similarity( # [q, c]
+                        ein.rearrange(query_descs, "q d -> q 1 d"),
+                        ein.rearrange(self.c_centers, "c d -> 1 c d"),
+                        dim=2)
+                soft_assign = F.softmax(self.soft_temp*cos_sims,
+                        dim=1)
+                if cache_id is not None and self.can_use_cache_vlad():
+                    torch.save(soft_assign,
+                            f"{self.cache_dir}/{cache_id}_s.pt")
+            # Soft assignment scores (as probabilities): [q, c]
+            for k in range(0, self.num_clusters):
+                w = ein.rearrange(soft_assign[:, k], "q -> q 1 1")
+                # Sum of residuals for all descriptors (for cluster k)
+                cd_sum = ein.rearrange(w * residuals,
+                            "q c d -> (q c) d").sum(dim=0)  # [d]
+                if self.intra_norm:
+                    cd_sum = F.normalize(cd_sum, dim=0)
+                un_vlad[k*self.desc_dim:(k+1)*self.desc_dim] = cd_sum
+        # Normalize the VLAD vector
+        n_vlad = F.normalize(un_vlad, dim=0)
+        return n_vlad
+    def generate_multi(self,
+            multi_query: Union[np.ndarray, torch.Tensor, list],
+            cache_ids: Union[List[str], None]=None) \
+            -> Union[torch.Tensor, list]:
+        """
+            Given query descriptors from multiple images, generate
+            the VLAD for them.
+            Parameters:
+            - multi_query:  Descriptors of shape [n_imgs, n_kpts, d]
+                            There are 'n_imgs' and each image has
+                            'n_kpts' keypoints, with 'd' dimensional
+                            descriptor each. If a List (can then have
+                            different number of keypoints in each
+                            image), then the result is also a list.
+            - cache_ids:    Cache IDs for the VLAD vectors. If None,
+                            then no caching is done (stored or
+                            retrieved). If a list, then the length
+                            should be 'n_imgs' (one per image).
+            Returns:
+            - multi_res:    VLAD descriptors for the queries
+        """
+        if cache_ids is None:
+            cache_ids = [None] * len(multi_query)
+        res = [self.generate(q, c) \
+                for (q, c) in zip(multi_query, cache_ids)]
+        try:    # Most likely pytorch
+            res = torch.stack(res)
+        except TypeError:
+            try:    # Otherwise numpy
+                res = np.stack(res)
+            except TypeError:
+                pass    # Let it remain as a list
+        return res
+    def generate_res_vec(self,
+                query_descs: Union[np.ndarray, torch.Tensor],
+                cache_id: Union[str, None]=None) -> torch.Tensor:
+        """
+            Given the query descriptors, generate a VLAD vector. Call
+            `fit` before using this method. Use this for only single
+            images and with descriptors stacked. Use function
+            `generate_multi` for multiple images.
+            Parameters:
+            - query_descs:  Query descriptors of shape [n_q, desc_dim]
+                            where 'n_q' is number of 'desc_dim'
+                            dimensional descriptors in a query image.
+            - cache_id:     If not None, then the VLAD vector is
+                            constructed using the residual and labels
+                            from this file.
+            Returns:
+            - residuals:    Residual vector: shape [n_q, n_c, d]
+        """
+        assert self.kmeans is not None
+        assert self.c_centers is not None
+        # Compute residuals (all query to cluster): [q, c, d]
+        if cache_id is not None and self.can_use_cache_vlad() and \
+                os.path.isfile(f"{self.cache_dir}/{cache_id}_r.pt"):
+            residuals = torch.load(
+                    f"{self.cache_dir}/{cache_id}_r.pt")
+        else:
+            if type(query_descs) == np.ndarray:
+                query_descs = torch.from_numpy(query_descs)\
+                    .to(torch.float32)
+            if self.norm_descs:
+                query_descs = F.normalize(query_descs)
+            residuals = ein.rearrange(query_descs, "q d -> q 1 d") \
+                    - ein.rearrange(self.c_centers, "c d -> 1 c d")
+            if cache_id is not None and self.can_use_cache_vlad():
+                cid_dir = f"{self.cache_dir}/"\
+                        f"{os.path.split(cache_id)[0]}"
+                if not os.path.isdir(cid_dir):
+                    os.makedirs(cid_dir)
+                    print(f"Created directory: {cid_dir}")
+                torch.save(residuals,
+                        f"{self.cache_dir}/{cache_id}_r.pt")
+        # print("residuals",residuals.shape)
+        return residuals
+    def generate_multi_res_vec(self,
+            multi_query: Union[np.ndarray, torch.Tensor, list],
+            cache_ids: Union[List[str], None]=None) \
+            -> Union[torch.Tensor, list]:
+        """
+            Given query descriptors from multiple images, generate
+            the VLAD for them.
+            Parameters:
+            - multi_query:  Descriptors of shape [n_imgs, n_kpts, d]
+                            There are 'n_imgs' and each image has
+                            'n_kpts' keypoints, with 'd' dimensional
+                            descriptor each. If a List (can then have
+                            different number of keypoints in each
+                            image), then the result is also a list.
+            - cache_ids:    Cache IDs for the VLAD vectors. If None,
+                            then no caching is done (stored or
+                            retrieved). If a list, then the length
+                            should be 'n_imgs' (one per image).
+            Returns:
+            - multi_res:    VLAD descriptors for the queries
+        """
+        if cache_ids is None:
+            cache_ids = [None] * len(multi_query)
+        res = [self.generate_res_vec(q, c) \
+                for (q, c) in zip(multi_query, cache_ids)]
+        try:    # Most likely pytorch
+            res = torch.stack(res)
+        except TypeError:
+            try:    # Otherwise numpy
+                res = np.stack(res)
+            except TypeError:
+                pass    # Let it remain as a list
+        return res