Spaces:
Configuration error
Configuration error
| import math | |
| import numpy as np | |
| from numba import jit, prange, cuda, float32 | |
| # https://github.com/talboger/fastdist | |
| def cosine(u, v, w=None): | |
| """ | |
| :purpose: | |
| Computes the cosine similarity between two 1D arrays | |
| Unlike scipy's cosine distance, this returns similarity, which is 1 - distance | |
| :params: | |
| u, v : input arrays, both of shape (n,) | |
| w : weights at each index of u and v. array of shape (n,) | |
| if no w is set, it is initialized as an array of ones | |
| such that it will have no impact on the output | |
| :returns: | |
| cosine : float, the cosine similarity between u and v | |
| :example: | |
| >>> from fastdist import fastdist | |
| >>> import numpy as np | |
| >>> u, v, w = np.random.RandomState(seed=0).rand(10000, 3).T | |
| >>> fastdist.cosine(u, v, w) | |
| 0.7495065944399267 | |
| """ | |
| n = len(u) | |
| num = 0 | |
| u_norm, v_norm = 0, 0 | |
| for i in range(n): | |
| num += u[i] * v[i] * w[i] | |
| u_norm += abs(u[i]) ** 2 * w[i] | |
| v_norm += abs(v[i]) ** 2 * w[i] | |
| denom = (u_norm * v_norm) ** (1 / 2) | |
| return num / denom | |
| def cosine_vector_to_matrix(u, m): | |
| """ | |
| :purpose: | |
| Computes the cosine similarity between a 1D array and rows of a matrix | |
| :params: | |
| u : input vector of shape (n,) | |
| m : input matrix of shape (m, n) | |
| :returns: | |
| cosine vector : np.array, of shape (m,) vector containing cosine similarity between u | |
| and the rows of m | |
| :example: | |
| >>> from fastdist import fastdist | |
| >>> import numpy as np | |
| >>> u = np.random.RandomState(seed=0).rand(10) | |
| >>> m = np.random.RandomState(seed=0).rand(100, 10) | |
| >>> fastdist.cosine_vector_to_matrix(u, m) | |
| (returns an array of shape (100,)) | |
| """ | |
| norm = 0 | |
| for i in range(len(u)): | |
| norm += abs(u[i]) ** 2 | |
| u = u / norm ** (1 / 2) | |
| for i in range(m.shape[0]): | |
| norm = 0 | |
| for j in range(len(m[i])): | |
| norm += abs(m[i][j]) ** 2 | |
| m[i] = m[i] / norm ** (1 / 2) | |
| return np.dot(u, m.T) | |
| def cosine_matrix_to_matrix(a, b): | |
| """ | |
| :purpose: | |
| Computes the cosine similarity between the rows of two matrices | |
| :params: | |
| a, b : input matrices of shape (m, n) and (k, n) | |
| the matrices must share a common dimension at index 1 | |
| :returns: | |
| cosine matrix : np.array, an (m, k) array of the cosine similarity | |
| between the rows of a and b | |
| :example: | |
| >>> from fastdist import fastdist | |
| >>> import numpy as np | |
| >>> a = np.random.RandomState(seed=0).rand(10, 50) | |
| >>> b = np.random.RandomState(seed=0).rand(100, 50) | |
| >>> fastdist.cosine_matrix_to_matrix(a, b) | |
| (returns an array of shape (10, 100)) | |
| """ | |
| for i in range(a.shape[0]): | |
| norm = 0 | |
| for j in range(len(a[i])): | |
| norm += abs(a[i][j]) ** 2 | |
| a[i] = a[i] / norm ** (1 / 2) | |
| for i in range(b.shape[0]): | |
| norm = 0 | |
| for j in range(len(b[i])): | |
| norm += abs(b[i][j]) ** 2 | |
| b[i] = b[i] / norm ** (1 / 2) | |
| return np.dot(a, b.T) | |
| def euclidean(u, v): | |
| """ | |
| :purpose: | |
| Computes the Euclidean distance between two 1D arrays | |
| :params: | |
| u, v : input arrays, both of shape (n,) | |
| w : weights at each index of u and v. array of shape (n,) | |
| if no w is set, it is initialized as an array of ones | |
| such that it will have no impact on the output | |
| :returns: | |
| euclidean : float, the Euclidean distance between u and v | |
| :example: | |
| >>> from fastdist import fastdist | |
| >>> import numpy as np | |
| >>> u, v, w = np.random.RandomState(seed=0).rand(10000, 3).T | |
| >>> fastdist.euclidean(u, v, w) | |
| 28.822558591834163 | |
| """ | |
| n = len(u) | |
| dist = 0 | |
| for i in range(n): | |
| dist += abs(u[i] - v[i]) ** 2 | |
| return dist ** (1 / 2) | |
| def euclidean_vector_to_matrix_distance(u, m): | |
| """ | |
| :purpose: | |
| Computes the distance between a vector and the rows of a matrix using any given metric | |
| :params: | |
| u : input vector of shape (n,) | |
| m : input matrix of shape (m, n) | |
| distance vector : np.array, of shape (m,) vector containing the distance between u | |
| and the rows of m | |
| :example: | |
| >>> from fastdist import fastdist | |
| >>> import numpy as np | |
| >>> u = np.random.RandomState(seed=0).rand(10) | |
| >>> m = np.random.RandomState(seed=0).rand(100, 10) | |
| >>> fastdist.vector_to_matrix_distance(u, m) | |
| (returns an array of shape (100,)) | |
| :note: | |
| the cosine similarity uses its own function, cosine_vector_to_matrix. | |
| this is because normalizing the rows and then taking the dot product | |
| of the vector and matrix heavily optimizes the computation. the other similarity | |
| metrics do not have such an optimization, so we loop through them | |
| """ | |
| n = m.shape[0] | |
| out = np.zeros((n), dtype=np.float32) | |
| for i in prange(n): | |
| dist = 0 | |
| for l in range(len(u)): | |
| dist += abs(u[l] - m[i][l]) ** 2 | |
| out[i] = dist ** (1 / 2) | |
| return out | |
| def gpu_kernel_euclidean_vector_to_matrix_distance(u, m, u_dim0, m_dim0, out): | |
| # Thread id in a 1D block | |
| tx = cuda.threadIdx.x | |
| # Block id in a 1D grid | |
| ty = cuda.blockIdx.x | |
| # Block width, i.e. number of threads per block | |
| bw = cuda.blockDim.x | |
| # Compute flattened index inside the array | |
| pos = tx + ty * bw | |
| if pos < m_dim0: # Check array boundaries | |
| dist = 0 | |
| for l in range(u_dim0): | |
| d = abs(u[l] - m[pos][l]) | |
| dist += d * d | |
| out[pos] = dist ** (1 / 2) | |
| def euclidean_vector_to_matrix_distance_gpu(u, m): | |
| m_dim0 = m.shape[0] | |
| u_dim0 = u.shape[0] | |
| out = np.zeros((m_dim0), dtype=np.float32) | |
| threadsperblock = 16 | |
| blockspergrid = (m_dim0 + (threadsperblock - 1)) // threadsperblock | |
| gpu_kernel_euclidean_vector_to_matrix_distance[blockspergrid, threadsperblock](u, m, u_dim0, m_dim0, out) | |
| return out | |
| # https://numba.readthedocs.io/en/stable/cuda/examples.html | |
| def gpu_kernel_euclidean_matrix_to_matrix_distance_fast(A, B, C): | |
| TPB = 16 | |
| # Define an array in the shared memory | |
| # The size and type of the arrays must be known at compile time | |
| sA = cuda.shared.array(shape=(TPB, TPB), dtype=float32) | |
| sB = cuda.shared.array(shape=(TPB, TPB), dtype=float32) | |
| x, y = cuda.grid(2) | |
| tx = cuda.threadIdx.x | |
| ty = cuda.threadIdx.y | |
| bpg = cuda.gridDim.x # blocks per grid | |
| # Each thread computes one element in the result matrix. | |
| # The dot product is chunked into dot products of TPB-long vectors. | |
| tmp = float32(0.) | |
| for i in range(bpg): | |
| # Preload data into shared memory | |
| sA[ty, tx] = 0 | |
| sB[ty, tx] = 0 | |
| if y < A.shape[0] and (tx + i * TPB) < A.shape[1]: | |
| sA[ty, tx] = A[y, tx + i * TPB] | |
| if x < B.shape[1] and (ty + i * TPB) < B.shape[0]: | |
| sB[ty, tx] = B[ty + i * TPB, x] | |
| # Wait until all threads finish preloading | |
| cuda.syncthreads() | |
| # Computes partial product on the shared memory | |
| for j in range(TPB): | |
| d = abs(sA[ty, j] - sB[j, tx]) | |
| tmp += d * d | |
| # Wait until all threads finish computing | |
| cuda.syncthreads() | |
| if y < C.shape[0] and x < C.shape[1]: | |
| C[y, x] = tmp ** (1 / 2) | |
| def euclidean_matrix_to_matrix_distance_gpu_fast(u, m): | |
| u_dim0 = u.shape[0] | |
| m_dim1 = m.shape[1] | |
| # vec_dim = u.shape[1] | |
| # assert vec_dim == m.shape[1] | |
| out = np.zeros((u_dim0, m_dim1), dtype=np.float32) | |
| threadsperblock = (16, 16) | |
| grid_y_max = max(u.shape[0], m.shape[0]) | |
| grid_x_max = max(u.shape[1], m.shape[1]) | |
| blockspergrid_x = math.ceil(grid_x_max / threadsperblock[0]) | |
| blockspergrid_y = math.ceil(grid_y_max / threadsperblock[1]) | |
| blockspergrid = (blockspergrid_x, blockspergrid_y) | |
| u_d = cuda.to_device(u) | |
| m_d = cuda.to_device(m) | |
| out_d = cuda.to_device(out) | |
| gpu_kernel_euclidean_matrix_to_matrix_distance_fast[blockspergrid, threadsperblock](u_d, m_d, out_d) | |
| out = out_d.copy_to_host() | |
| return out | |
| def euclidean_matrix_to_matrix_distance(a, b): | |
| """ | |
| :purpose: | |
| Computes the distance between the rows of two matrices using any given metric | |
| :params: | |
| a, b : input matrices either of shape (m, n) and (k, n) | |
| the matrices must share a common dimension at index 1 | |
| metric : the function used to calculate the distance | |
| metric_name : str of the function name. this is only used for | |
| the if statement because cosine similarity has its | |
| own function | |
| :returns: | |
| distance matrix : np.array, an (m, k) array of the distance | |
| between the rows of a and b | |
| :example: | |
| >>> from fastdist import fastdist | |
| >>> import numpy as np | |
| >>> a = np.random.RandomState(seed=0).rand(10, 50) | |
| >>> b = np.random.RandomState(seed=0).rand(100, 50) | |
| >>> fastdist.matrix_to_matrix_distance(a, b, fastdist.cosine, "cosine") | |
| (returns an array of shape (10, 100)) | |
| :note: | |
| the cosine similarity uses its own function, cosine_matrix_to_matrix. | |
| this is because normalizing the rows and then taking the dot product | |
| of the two matrices heavily optimizes the computation. the other similarity | |
| metrics do not have such an optimization, so we loop through them | |
| """ | |
| n, m = a.shape[0], b.shape[0] | |
| out = np.zeros((n, m), dtype=np.float32) | |
| for i in prange(n): | |
| for j in range(m): | |
| dist = 0 | |
| for l in range(len(a[i])): | |
| dist += abs(a[i][l] - b[j][l]) ** 2 | |
| out[i][j] = dist ** (1 / 2) | |
| return out | |