Spaces:

englert
/

visdif

Configuration error

App Files Files

englert commited on Nov 1, 2022

Commit

ba93a7e

1 Parent(s): e839e64

update app.py #3

Browse files

Files changed (3) hide show

app.py +1 -1
fastdist2.py +1 -290
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -90,4 +90,4 @@ demo = gr.Interface(
             gr.components.Number(label="Downsample size")],
     outputs=gr.components.File(label="Zip"))
-demo.launch()

             gr.components.Number(label="Downsample size")],
     outputs=gr.components.File(label="Zip"))
+demo.launch(debug = True)

fastdist2.py CHANGED Viewed

@@ -1,144 +1,9 @@
-import math
 import numpy as np
-from numba import jit, prange, cuda, float32
 # https://github.com/talboger/fastdist
-@jit(nopython=True, fastmath=True)
-def cosine(u, v, w=None):
-    """
-    :purpose:
-    Computes the cosine similarity between two 1D arrays
-    Unlike scipy's cosine distance, this returns similarity, which is 1 - distance
-    :params:
-    u, v   : input arrays, both of shape (n,)
-    w      : weights at each index of u and v. array of shape (n,)
-             if no w is set, it is initialized as an array of ones
-             such that it will have no impact on the output
-    :returns:
-    cosine  : float, the cosine similarity between u and v
-    :example:
-    >>> from fastdist import fastdist
-    >>> import numpy as np
-    >>> u, v, w = np.random.RandomState(seed=0).rand(10000, 3).T
-    >>> fastdist.cosine(u, v, w)
-    0.7495065944399267
-    """
-    n = len(u)
-    num = 0
-    u_norm, v_norm = 0, 0
-    for i in range(n):
-        num += u[i] * v[i] * w[i]
-        u_norm += abs(u[i]) ** 2 * w[i]
-        v_norm += abs(v[i]) ** 2 * w[i]
-    denom = (u_norm * v_norm) ** (1 / 2)
-    return num / denom
-@jit(nopython=True, fastmath=True)
-def cosine_vector_to_matrix(u, m):
-    """
-    :purpose:
-    Computes the cosine similarity between a 1D array and rows of a matrix
-    :params:
-    u      : input vector of shape (n,)
-    m      : input matrix of shape (m, n)
-    :returns:
-    cosine vector  : np.array, of shape (m,) vector containing cosine similarity between u
-                     and the rows of m
-    :example:
-    >>> from fastdist import fastdist
-    >>> import numpy as np
-    >>> u = np.random.RandomState(seed=0).rand(10)
-    >>> m = np.random.RandomState(seed=0).rand(100, 10)
-    >>> fastdist.cosine_vector_to_matrix(u, m)
-    (returns an array of shape (100,))
-    """
-    norm = 0
-    for i in range(len(u)):
-        norm += abs(u[i]) ** 2
-    u = u / norm ** (1 / 2)
-    for i in range(m.shape[0]):
-        norm = 0
-        for j in range(len(m[i])):
-            norm += abs(m[i][j]) ** 2
-        m[i] = m[i] / norm ** (1 / 2)
-    return np.dot(u, m.T)
-@jit(nopython=True, fastmath=True)
-def cosine_matrix_to_matrix(a, b):
-    """
-    :purpose:
-    Computes the cosine similarity between the rows of two matrices
-    :params:
-    a, b   : input matrices of shape (m, n) and (k, n)
-             the matrices must share a common dimension at index 1
-    :returns:
-    cosine matrix  : np.array, an (m, k) array of the cosine similarity
-                     between the rows of a and b
-    :example:
-    >>> from fastdist import fastdist
-    >>> import numpy as np
-    >>> a = np.random.RandomState(seed=0).rand(10, 50)
-    >>> b = np.random.RandomState(seed=0).rand(100, 50)
-    >>> fastdist.cosine_matrix_to_matrix(a, b)
-    (returns an array of shape (10, 100))
-    """
-    for i in range(a.shape[0]):
-        norm = 0
-        for j in range(len(a[i])):
-            norm += abs(a[i][j]) ** 2
-        a[i] = a[i] / norm ** (1 / 2)
-    for i in range(b.shape[0]):
-        norm = 0
-        for j in range(len(b[i])):
-            norm += abs(b[i][j]) ** 2
-        b[i] = b[i] / norm ** (1 / 2)
-    return np.dot(a, b.T)
-@jit(nopython=True, fastmath=True)
-def euclidean(u, v):
-    """
-    :purpose:
-    Computes the Euclidean distance between two 1D arrays
-    :params:
-    u, v   : input arrays, both of shape (n,)
-    w      : weights at each index of u and v. array of shape (n,)
-             if no w is set, it is initialized as an array of ones
-             such that it will have no impact on the output
-    :returns:
-    euclidean : float, the Euclidean distance between u and v
-    :example:
-    >>> from fastdist import fastdist
-    >>> import numpy as np
-    >>> u, v, w = np.random.RandomState(seed=0).rand(10000, 3).T
-    >>> fastdist.euclidean(u, v, w)
-    28.822558591834163
-    """
-    n = len(u)
-    dist = 0
-    for i in range(n):
-        dist += abs(u[i] - v[i]) ** 2
-    return dist ** (1 / 2)
 @jit(nopython=True, fastmath=True)
 def euclidean_vector_to_matrix_distance(u, m):
     """
@@ -176,157 +41,3 @@ def euclidean_vector_to_matrix_distance(u, m):
         out[i] = dist ** (1 / 2)
     return out
-@cuda.jit
-def gpu_kernel_euclidean_vector_to_matrix_distance(u, m, u_dim0, m_dim0, out):
-    # Thread id in a 1D block
-    tx = cuda.threadIdx.x
-    # Block id in a 1D grid
-    ty = cuda.blockIdx.x
-    # Block width, i.e. number of threads per block
-    bw = cuda.blockDim.x
-    # Compute flattened index inside the array
-    pos = tx + ty * bw
-    if pos < m_dim0:  # Check array boundaries
-        dist = 0
-        for l in range(u_dim0):
-            d = abs(u[l] - m[pos][l])
-            dist += d * d
-        out[pos] = dist ** (1 / 2)
-def euclidean_vector_to_matrix_distance_gpu(u, m):
-    m_dim0 = m.shape[0]
-    u_dim0 = u.shape[0]
-    out = np.zeros((m_dim0), dtype=np.float32)
-    threadsperblock = 16
-    blockspergrid = (m_dim0 + (threadsperblock - 1)) // threadsperblock
-    gpu_kernel_euclidean_vector_to_matrix_distance[blockspergrid, threadsperblock](u, m, u_dim0, m_dim0, out)
-    return out
-# https://numba.readthedocs.io/en/stable/cuda/examples.html
-@cuda.jit
-def gpu_kernel_euclidean_matrix_to_matrix_distance_fast(A, B, C):
-    TPB = 16
-    # Define an array in the shared memory
-    # The size and type of the arrays must be known at compile time
-    sA = cuda.shared.array(shape=(TPB, TPB), dtype=float32)
-    sB = cuda.shared.array(shape=(TPB, TPB), dtype=float32)
-    x, y = cuda.grid(2)
-    tx = cuda.threadIdx.x
-    ty = cuda.threadIdx.y
-    bpg = cuda.gridDim.x  # blocks per grid
-    # Each thread computes one element in the result matrix.
-    # The dot product is chunked into dot products of TPB-long vectors.
-    tmp = float32(0.)
-    for i in range(bpg):
-        # Preload data into shared memory
-        sA[ty, tx] = 0
-        sB[ty, tx] = 0
-        if y < A.shape[0] and (tx + i * TPB) < A.shape[1]:
-            sA[ty, tx] = A[y, tx + i * TPB]
-        if x < B.shape[1] and (ty + i * TPB) < B.shape[0]:
-            sB[ty, tx] = B[ty + i * TPB, x]
-        # Wait until all threads finish preloading
-        cuda.syncthreads()
-        # Computes partial product on the shared memory
-        for j in range(TPB):
-            d = abs(sA[ty, j] - sB[j, tx])
-            tmp += d * d
-        # Wait until all threads finish computing
-        cuda.syncthreads()
-    if y < C.shape[0] and x < C.shape[1]:
-        C[y, x] = tmp ** (1 / 2)
-def euclidean_matrix_to_matrix_distance_gpu_fast(u, m):
-    u_dim0 = u.shape[0]
-    m_dim1 = m.shape[1]
-    # vec_dim = u.shape[1]
-    # assert vec_dim == m.shape[1]
-    out = np.zeros((u_dim0, m_dim1), dtype=np.float32)
-    threadsperblock = (16, 16)
-    grid_y_max = max(u.shape[0], m.shape[0])
-    grid_x_max = max(u.shape[1], m.shape[1])
-    blockspergrid_x = math.ceil(grid_x_max / threadsperblock[0])
-    blockspergrid_y = math.ceil(grid_y_max / threadsperblock[1])
-    blockspergrid = (blockspergrid_x, blockspergrid_y)
-    u_d = cuda.to_device(u)
-    m_d = cuda.to_device(m)
-    out_d = cuda.to_device(out)
-    gpu_kernel_euclidean_matrix_to_matrix_distance_fast[blockspergrid, threadsperblock](u_d, m_d, out_d)
-    out = out_d.copy_to_host()
-    return out
-@jit(cache=True, nopython=True, parallel=True, fastmath=True, boundscheck=False, nogil=True)
-def euclidean_matrix_to_matrix_distance(a, b):
-    """
-    :purpose:
-    Computes the distance between the rows of two matrices using any given metric
-    :params:
-    a, b   : input matrices either of shape (m, n) and (k, n)
-             the matrices must share a common dimension at index 1
-    metric : the function used to calculate the distance
-    metric_name : str of the function name. this is only used for
-                  the if statement because cosine similarity has its
-                  own function
-    :returns:
-    distance matrix  : np.array, an (m, k) array of the distance
-                       between the rows of a and b
-    :example:
-    >>> from fastdist import fastdist
-    >>> import numpy as np
-    >>> a = np.random.RandomState(seed=0).rand(10, 50)
-    >>> b = np.random.RandomState(seed=0).rand(100, 50)
-    >>> fastdist.matrix_to_matrix_distance(a, b, fastdist.cosine, "cosine")
-    (returns an array of shape (10, 100))
-    :note:
-    the cosine similarity uses its own function, cosine_matrix_to_matrix.
-    this is because normalizing the rows and then taking the dot product
-    of the two matrices heavily optimizes the computation. the other similarity
-    metrics do not have such an optimization, so we loop through them
-    """
-    n, m = a.shape[0], b.shape[0]
-    out = np.zeros((n, m), dtype=np.float32)
-    for i in prange(n):
-        for j in range(m):
-            dist = 0
-            for l in range(len(a[i])):
-                dist += abs(a[i][l] - b[j][l]) ** 2
-            out[i][j] = dist ** (1 / 2)
-    return out

 import numpy as np
+from numba import jit, prange
 # https://github.com/talboger/fastdist
 @jit(nopython=True, fastmath=True)
 def euclidean_vector_to_matrix_distance(u, m):
     """
         out[i] = dist ** (1 / 2)
     return out

requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ numpy
 opencv-python
 umap-learn
 numba

 opencv-python
 umap-learn
 numba
+gradio