| """ |
| Compute search time needed for searching 100 new queries in a corpus containing 1M videos. |
| The performance reported is tested on 1.4.0.dev20191109 with Python3.7 and CUDA10.1. |
| |
| This experiment is simulated. |
| """ |
|
|
| import os |
| import time |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| import numpy as np |
| from utils.basic_utils import save_json |
|
|
| import logging |
|
|
| logger = logging.getLogger(__name__) |
| logging.basicConfig(format="%(asctime)s.%(msecs)03d:%(levelname)s:%(name)s - %(message)s", |
| datefmt="%Y-%m-%d %H:%M:%S", |
| level=logging.INFO) |
|
|
| np.random.seed(1234) |
|
|
|
|
| def compare_l2dist_inner_product_time(n_videos=2000, d=256, n_query=1000, n_runs=10, n_warmup_runs=10): |
| """In some PyTorch/Cuda Verison, torch.cdist is very slow, which affects this comparison. |
| See https://discuss.pytorch.org/t/cdist-vs-matmul/61682/5""" |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_database = F.normalize(torch.randn((n_videos, d), dtype=torch.float32).cuda(), dim=1, p=2) |
| fake_query = F.normalize(torch.randn((n_query, d), dtype=torch.float32).cuda(), dim=1, p=2) |
| torch.cuda.synchronize() |
| print("Construct fake database + query time {}".format(time.time() - st_time)) |
| print("fake_database shape {} fake_query shape {}".format(fake_database.shape, fake_query.shape)) |
|
|
| times_l2dist = [] |
| for _ in range(n_warmup_runs + n_runs): |
| torch.cuda.synchronize() |
| st_time = time.time() |
| l2_dist = torch.cdist(fake_query, fake_database, p=2) |
| torch.cuda.synchronize() |
| times_l2dist.append(time.time() - st_time) |
| avg_time_l2dist = np.mean(times_l2dist[n_warmup_runs:]) |
| print("L2 Distance time {}".format(avg_time_l2dist)) |
|
|
| times_ip = [] |
| fake_database = fake_database.transpose(0, 1) |
| for _ in range(n_warmup_runs + n_runs): |
| torch.cuda.synchronize() |
| st_time = time.time() |
| inner_product = torch.mm(fake_query, fake_database) |
| torch.cuda.synchronize() |
| times_ip.append(time.time() - st_time) |
| avg_time_ip = np.mean(times_ip[n_warmup_runs:]) |
| print("Inner Product time {}".format(avg_time_ip)) |
|
|
|
|
| def run_example(): |
| """ |
| In Python, the matrices are always represented as numpy arrays. |
| The data type dtype must be float32. |
| """ |
| |
| |
| |
| import faiss |
| d = 64 |
| nb = 100000 |
| nq = 10000 |
| np.random.seed(1234) |
| xb = np.random.random((nb, d)).astype('float32') |
| xb[:, 0] += np.arange(nb) / 1000. |
| xq = np.random.random((nq, d)).astype('float32') |
| xq[:, 0] += np.arange(nq) / 1000. |
|
|
| |
| |
| |
| |
| index = faiss.IndexFlatL2(d) |
| print(index.is_trained) |
| index.add(xb) |
| print(index.ntotal) |
|
|
| k = 4 |
| D, I = index.search(xb[:5], k) |
| print(I) |
| print(D) |
| st_time = time.time() |
| D, I = index.search(xq, k) |
| print("time elapsed {}".format(time.time() - st_time)) |
| print(I[:5]) |
| print(I[-5:]) |
|
|
|
|
| def simulate_mee_runtime(n_videos=1000000, d=256, n_query=100, max_neighbors=100, n_runs=5, n_warmup_runs=10): |
| """ Search over a database of shape [n_videos, d] with query of shape [n_query, d]. |
| For each query, return max_neighbors results. |
| """ |
| import faiss |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_database = faiss.rand((n_videos, d)) |
| fake_query = faiss.rand((n_query, d)) |
| torch.cuda.synchronize() |
| logger.info("Construct fake database + query time {}".format(time.time() - st_time)) |
|
|
| torch.cuda.synchronize() |
| st_time = time.time() |
| index = faiss.index_factory(d, "IVF4096,Flat", faiss.METRIC_L2) |
| index_ivf = faiss.extract_index_ivf(index) |
| clustering_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d)) |
| index_ivf.clustering_index = clustering_index |
| torch.cuda.synchronize() |
| logger.info("Build/Move to GPU? index time {}".format(time.time() - st_time)) |
|
|
| st_time = time.time() |
| torch.cuda.synchronize() |
| index_ivf.train(fake_database) |
| torch.cuda.synchronize() |
| logger.info("Train index time {}".format(time.time() - st_time)) |
|
|
| times = [] |
| for _ in range(n_warmup_runs+n_runs): |
| torch.cuda.synchronize() |
| st_time = time.time() |
| D, I = index_ivf.search(fake_query, max_neighbors) |
| torch.cuda.synchronize() |
| times.append(time.time() - st_time) |
| avg_time = np.mean(times[n_warmup_runs:]) * 2 |
| logger.info("Avg searching time ({} runs) {}".format(n_runs, avg_time)) |
| return avg_time |
|
|
|
|
| def simulate_cal_rerank_time(n_moments=200, avg_n_clips_per_moment=7, d=256, n_query=100, max_neighbors=100, |
| n_runs=5, n_warmup_runs=10): |
| st_time = time.time() |
| torch.cuda.synchronize() |
| fake_database = torch.randn((n_moments * avg_n_clips_per_moment, d), dtype=torch.float32).cuda() |
| fake_query = torch.randn((n_query, d), dtype=torch.float32).cuda() |
| torch.cuda.synchronize() |
| logger.info("Construct fake database + query time {}".format(time.time() - st_time)) |
|
|
| times = [] |
| for _ in range(n_warmup_runs+n_runs): |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_dist = torch.cdist(fake_query, fake_database, p=2) |
| fake_dist = fake_dist.view(n_query, n_moments, avg_n_clips_per_moment).mean(2) |
| fake_dist = torch.cdist(fake_query, fake_database, p=2) |
| fake_dist = fake_dist.view(n_query, n_moments, avg_n_clips_per_moment).mean(2) |
| fake_dist = fake_dist + fake_dist |
| fake_top_indices, fake_top_dist = torch.topk(fake_dist, k=max_neighbors, dim=1, largest=False, sorted=True) |
| torch.cuda.synchronize() |
| times.append(time.time() - st_time) |
| avg_time = np.mean(times[n_warmup_runs:]) |
| logger.info("searching time {}".format(avg_time)) |
| return avg_time |
|
|
|
|
| def simulate_mcn_rerank_time(n_moments=200, d=256, n_query=100, max_neighbors=100, n_runs=5, n_warmup_runs=10): |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_database = torch.randn((n_moments, d), dtype=torch.float32).cuda() |
| fake_query = torch.randn((n_query, d), dtype=torch.float32).cuda() |
| torch.cuda.synchronize() |
| logger.info("Construct fake database + query time {}".format(time.time() - st_time)) |
|
|
| times = [] |
| for _ in range(n_warmup_runs+n_runs): |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_dist = torch.cdist(fake_query, fake_database, p=2).view(n_query, n_moments) |
| fake_dist = torch.cdist(fake_query, fake_database, p=2).view(n_query, n_moments) |
| fake_dist = fake_dist + fake_dist |
| fake_top_indices, fake_top_dist = torch.topk(fake_dist, k=max_neighbors, dim=1, largest=False, sorted=True) |
| torch.cuda.synchronize() |
| times.append(time.time() - st_time) |
| avg_time = np.mean(times[n_warmup_runs:]) |
| logger.info("searching time {}".format(avg_time)) |
| return avg_time |
|
|
|
|
| def simulate_xml_rerank_time(n_videos=100, avg_n_clips_per_video=20, d=256, n_query=100, max_neighbors=100, |
| n_runs=5, n_warmup_runs=10): |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_database = torch.randn((d, n_videos*avg_n_clips_per_video), dtype=torch.float32).cuda() |
| fake_query = torch.randn((n_query, d), dtype=torch.float32).cuda() |
| conv = nn.Conv1d(in_channels=1, out_channels=2, kernel_size=5, stride=1, padding=2, bias=False).cuda() |
| torch.cuda.synchronize() |
| logger.info("Construct fake database + query time {}".format(time.time() - st_time)) |
|
|
| times = dict( |
| conv=[], |
| prod=[], |
| topk=[], |
| triu=[] |
| ) |
| for _ in range(n_warmup_runs+n_runs): |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_dist = torch.mm(fake_query, fake_database).view(n_query*n_videos, -1) |
| fake_dist = torch.mm(fake_query, fake_database).view(n_query * n_videos, -1) |
| fake_dist = fake_dist + fake_dist |
| torch.cuda.synchronize() |
| times["prod"].append(time.time() - st_time) |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_dist = conv(fake_dist.unsqueeze(1))[:, 0, :] |
| torch.cuda.synchronize() |
| times["conv"].append(time.time() - st_time) |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_prob_prod = torch.triu(torch.einsum("ns,ne->nse", fake_dist, fake_dist)).view(n_query, -1) |
| torch.cuda.synchronize() |
| times["triu"].append(time.time() - st_time) |
| torch.cuda.synchronize() |
| st_time = time.time() |
| fake_top_indices, fake_top_dist = torch.topk(fake_prob_prod, k=max_neighbors, dim=1, largest=True, sorted=True) |
| torch.cuda.synchronize() |
| times["topk"].append(time.time() - st_time) |
| avg_time = {k: np.mean(times[k][n_warmup_runs:]) for k in times} |
| avg_time["all"] = np.sum(list(avg_time.values())) |
| logger.info("searching time {}".format(avg_time)) |
| return avg_time |
|
|
|
|
| def get_storage_size(hsz, n_videos, n_clips_per_video, n_moments, n_total_clips_in_moments, dtype_size=4): |
| """dtype_size: float32, 4B""" |
| GB = 1024**3 |
| |
| storage = dict( |
| mee=n_videos * hsz * dtype_size * 2. / GB, |
| cal=n_total_clips_in_moments * hsz * dtype_size * 2. / GB, |
| mcn=n_moments * hsz * dtype_size * 2. / GB, |
| xml=n_videos * n_clips_per_video * hsz * dtype_size * 2. * 2. / GB |
| ) |
| print("storage (GB) {}".format(storage)) |
| return storage |
|
|
|
|
| def main_run(): |
| import argparse |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--mode", type=str, default="mee", help="which models to simulate") |
| parser.add_argument("--cache_dir", type=str, default="baselines/profiling/cache", help="save index/results path") |
| parser.add_argument("--n_runs", type=int, default=100, help="number of runs to calc average") |
| parser.add_argument("--n_warmup_runs", type=int, default=10, help="number of warmup runs, to init cuda, etc.") |
| args = parser.parse_args() |
|
|
| """ |
| The numbers are get from the first author of |
| `Temporal Localization of Moments in Video Collections with Natural Language` |
| """ |
| k = 100 |
| n_query = 100 |
| n_videos = 1000000 |
| n_moments_per_video = 170 |
| hsz = 256 |
| n_clips_per_video = 20 |
| n_total_clips_in_moments = 1170946944 |
| n_moments = 170000000 |
| max_clips_per_proposal = 14 |
| avg_clips_per_proposal = 7 |
|
|
| mode = args.mode |
| cfg_path = os.path.join(args.cache_dir, "{}_args.json".format(mode)) |
|
|
| n_runs = args.n_runs |
| n_warmup_runs = args.n_warmup_runs |
| torch.set_grad_enabled(False) |
| if mode in ["mee", "mee_torch"]: |
| func_args = dict(n_videos=n_videos, d=hsz, n_query=n_query, max_neighbors=k, |
| n_runs=n_runs, n_warmup_runs=n_warmup_runs) |
| avg_time = simulate_mee_runtime(**func_args) |
| elif mode == "xml_vr": |
| func_args = dict(n_videos=n_videos*n_clips_per_video, d=hsz, n_query=n_query, |
| max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs) |
| avg_time = simulate_mee_runtime(**func_args) |
| elif mode == "cal": |
| |
| n_cal_rerank_videos = 100 |
| func_args = dict(n_moments=n_cal_rerank_videos*n_moments_per_video, |
| avg_n_clips_per_moment=avg_clips_per_proposal, |
| d=hsz, n_query=n_query, max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs) |
| avg_time = simulate_cal_rerank_time(**func_args) |
| elif mode == "mcn": |
| n_cal_rerank_videos = 100 |
| func_args = dict(n_moments=n_cal_rerank_videos*n_moments_per_video, d=hsz, n_query=n_query, |
| max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs) |
| avg_time = simulate_mcn_rerank_time(**func_args) |
| elif mode == "xml": |
| n_xml_videos = 100 |
| func_args = dict(n_videos=n_xml_videos, avg_n_clips_per_video=n_clips_per_video, |
| d=hsz, n_query=n_query, max_neighbors=k, n_runs=n_runs, n_warmup_runs=n_warmup_runs) |
| avg_time = simulate_xml_rerank_time(**func_args) |
| elif mode == "storage": |
| func_args = dict(hsz=hsz, n_videos=n_videos, n_clips_per_video=n_clips_per_video, |
| n_moments=n_moments, n_total_clips_in_moments=n_total_clips_in_moments, dtype_size=4) |
| storage = get_storage_size(**func_args) |
| else: |
| raise NotImplementedError |
|
|
| if mode == "storage": |
| func_args["storage"] = storage |
| else: |
| func_args["n_runs"] = args.n_runs |
| func_args["avg_time"] = avg_time |
| func_args["mode"] = mode |
| print(func_args) |
| save_json(func_args, cfg_path, save_pretty=True) |
|
|
|
|
| if __name__ == '__main__': |
| main_run() |
| |
|
|