osakemon commited on Mar 18, 2025

Commit

1e315b6

verified ·

1 Parent(s): 5c94492

Upload 42 files

Browse files

Files changed (42) hide show

src/LICENSE +29 -0
src/build/lib/loki/__init__.py +0 -0
src/build/lib/loki/align.py +568 -0
src/build/lib/loki/annotate.py +102 -0
src/build/lib/loki/decompose.py +143 -0
src/build/lib/loki/plot.py +435 -0
src/build/lib/loki/plotting.py +435 -0
src/build/lib/loki/predex.py +25 -0
src/build/lib/loki/preprocess.py +324 -0
src/build/lib/loki/retrieve.py +28 -0
src/build/lib/loki/utilities.py +159 -0
src/build/lib/loki/utils.py +278 -0
src/dist/loki-0.0.1-py3-none-any.whl +0 -0
src/dist/loki-0.0.1.tar.gz +3 -0
src/loki.egg-info/PKG-INFO +23 -0
src/loki.egg-info/SOURCES.txt +16 -0
src/loki.egg-info/dependency_links.txt +1 -0
src/loki.egg-info/requires.txt +13 -0
src/loki.egg-info/top_level.txt +1 -0
src/loki/__init__.py +0 -0
src/loki/__pycache__/__init__.cpython-310.pyc +0 -0
src/loki/__pycache__/__init__.cpython-39.pyc +0 -0
src/loki/__pycache__/align.cpython-39.pyc +0 -0
src/loki/__pycache__/annotate.cpython-39.pyc +0 -0
src/loki/__pycache__/decompose.cpython-39.pyc +0 -0
src/loki/__pycache__/deconv.cpython-39.pyc +0 -0
src/loki/__pycache__/plot.cpython-39.pyc +0 -0
src/loki/__pycache__/predex.cpython-39.pyc +0 -0
src/loki/__pycache__/preprocess.cpython-39.pyc +0 -0
src/loki/__pycache__/retrieve.cpython-39.pyc +0 -0
src/loki/__pycache__/utils.cpython-39.pyc +0 -0
src/loki/align.py +568 -0
src/loki/annotate.py +102 -0
src/loki/decompose.py +143 -0
src/loki/plot.py +435 -0
src/loki/predex.py +25 -0
src/loki/preprocess.py +324 -0
src/loki/requirements.txt +14 -0
src/loki/retrieve.py +28 -0
src/loki/utils.py +278 -0
src/requirements.txt +14 -0
src/setup.py +32 -0

src/LICENSE ADDED Viewed

	@@ -0,0 +1,29 @@

+BSD 3-Clause License
+Copyright (c) 2025, Wang Lab
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

src/build/lib/loki/__init__.py ADDED Viewed

File without changes

src/build/lib/loki/align.py ADDED Viewed

	@@ -0,0 +1,568 @@

+import pycpd
+from builtins import super
+import numbers
+import numpy as np
+import cv2
+class EMRegistration(object):
+    """
+    Expectation maximization point cloud registration.
+    Adapted from Pure Numpy Implementation of the Coherent Point Drift Algorithm:
+    https://github.com/siavashk/pycpd
+    Attributes
+    ----------
+    X: numpy array
+        NxD array of target points.
+    Y: numpy array
+        MxD array of source points.
+    TY: numpy array
+        MxD array of transformed source points.
+    sigma2: float (positive)
+        Initial variance of the Gaussian mixture model.
+    N: int
+        Number of target points.
+    M: int
+        Number of source points.
+    D: int
+        Dimensionality of source and target points
+    iteration: int
+        The current iteration throughout registration.
+    max_iterations: int
+        Registration will terminate once the algorithm has taken this
+        many iterations.
+    tolerance: float (positive)
+        Registration will terminate once the difference between
+        consecutive objective function values falls within this tolerance.
+    w: float (between 0 and 1)
+        Contribution of the uniform distribution to account for outliers.
+        Valid values span 0 (inclusive) and 1 (exclusive).
+    q: float
+        The objective function value that represents the misalignment between source
+        and target point clouds.
+    diff: float (positive)
+        The absolute difference between the current and previous objective function values.
+    P: numpy array
+        MxN array of probabilities.
+        P[m, n] represents the probability that the m-th source point
+        corresponds to the n-th target point.
+    Pt1: numpy array
+        Nx1 column array.
+        Multiplication result between the transpose of P and a column vector of all 1s.
+    P1: numpy array
+        Mx1 column array.
+        Multiplication result between P and a column vector of all 1s.
+    Np: float (positive)
+        The sum of all elements in P.
+    """
+    def __init__(self, X, Y, sigma2=None, max_iterations=None, tolerance=None, w=None, *args, **kwargs):
+        if type(X) is not np.ndarray or X.ndim != 2:
+            raise ValueError(
+                "The target point cloud (X) must be at a 2D numpy array.")
+        if type(Y) is not np.ndarray or Y.ndim != 2:
+            raise ValueError(
+                "The source point cloud (Y) must be a 2D numpy array.")
+        if X.shape[1] != Y.shape[1]:
+            raise ValueError(
+                "Both point clouds need to have the same number of dimensions.")
+        if sigma2 is not None and (not isinstance(sigma2, numbers.Number) or sigma2 <= 0):
+            raise ValueError(
+                "Expected a positive value for sigma2 instead got: {}".format(sigma2))
+        if max_iterations is not None and (not isinstance(max_iterations, numbers.Number) or max_iterations < 0):
+            raise ValueError(
+                "Expected a positive integer for max_iterations instead got: {}".format(max_iterations))
+        elif isinstance(max_iterations, numbers.Number) and not isinstance(max_iterations, int):
+            warn("Received a non-integer value for max_iterations: {}. Casting to integer.".format(max_iterations))
+            max_iterations = int(max_iterations)
+        if tolerance is not None and (not isinstance(tolerance, numbers.Number) or tolerance < 0):
+            raise ValueError(
+                "Expected a positive float for tolerance instead got: {}".format(tolerance))
+        if w is not None and (not isinstance(w, numbers.Number) or w < 0 or w >= 1):
+            raise ValueError(
+                "Expected a value between 0 (inclusive) and 1 (exclusive) for w instead got: {}".format(w))
+        self.X = X
+        self.Y = Y
+        self.TY = Y
+        self.sigma2 = initialize_sigma2(X, Y) if sigma2 is None else sigma2
+        (self.N, self.D) = self.X.shape
+        (self.M, _) = self.Y.shape
+        self.tolerance = 0.001 if tolerance is None else tolerance
+        self.w = 0.0 if w is None else w
+        self.max_iterations = 100 if max_iterations is None else max_iterations
+        self.iteration = 0
+        self.diff = np.inf
+        self.q = np.inf
+        self.P = np.zeros((self.M, self.N))
+        self.Pt1 = np.zeros((self.N, ))
+        self.P1 = np.zeros((self.M, ))
+        self.PX = np.zeros((self.M, self.D))
+        self.Np = 0
+    def register(self, callback=lambda **kwargs: None):
+        """
+        Perform the EM registration.
+        Attributes
+        ----------
+        callback: function
+            A function that will be called after each iteration.
+            Can be used to visualize the registration process.
+        Returns
+        -------
+        self.TY: numpy array
+            MxD array of transformed source points.
+        registration_parameters:
+            Returned params dependent on registration method used.
+        """
+        self.transform_point_cloud()
+        while self.iteration < self.max_iterations and self.diff > self.tolerance:
+            self.iterate()
+            if callable(callback):
+                kwargs = {'iteration': self.iteration,
+                          'error': self.q, 'X': self.X, 'Y': self.TY}
+                callback(**kwargs)
+        return self.TY, self.get_registration_parameters()
+    def get_registration_parameters(self):
+        """
+        Placeholder for child classes.
+        """
+        raise NotImplementedError(
+            "Registration parameters should be defined in child classes.")
+    def update_transform(self):
+        """
+        Placeholder for child classes.
+        """
+        raise NotImplementedError(
+            "Updating transform parameters should be defined in child classes.")
+    def transform_point_cloud(self):
+        """
+        Placeholder for child classes.
+        """
+        raise NotImplementedError(
+            "Updating the source point cloud should be defined in child classes.")
+    def update_variance(self):
+        """
+        Placeholder for child classes.
+        """
+        raise NotImplementedError(
+            "Updating the Gaussian variance for the mixture model should be defined in child classes.")
+    def iterate(self):
+        """
+        Perform one iteration of the EM algorithm.
+        """
+        self.expectation()
+        self.maximization()
+        self.iteration += 1
+    def expectation(self):
+        """
+        Compute the expectation step of the EM algorithm.
+        """
+        P = np.sum((self.X[None, :, :] - self.TY[:, None, :])**2, axis=2) # (M, N)
+        P = np.exp(-P/(2*self.sigma2))
+        c = (2*np.pi*self.sigma2)**(self.D/2)*self.w/(1. - self.w)*self.M/self.N
+        den = np.sum(P, axis = 0, keepdims = True) # (1, N)
+        den = np.clip(den, np.finfo(self.X.dtype).eps, None) + c
+        self.P = np.divide(P, den)
+        self.Pt1 = np.sum(self.P, axis=0)
+        self.P1 = np.sum(self.P, axis=1)
+        self.Np = np.sum(self.P1)
+        self.PX = np.matmul(self.P, self.X)
+    def maximization(self):
+        """
+        Compute the maximization step of the EM algorithm.
+        """
+        self.update_transform()
+        self.transform_point_cloud()
+        self.update_variance()
+class DeformableRegistration(EMRegistration):
+    """
+    Deformable registration.
+    Adapted from Pure Numpy Implementation of the Coherent Point Drift Algorithm:
+    https://github.com/siavashk/pycpd
+    Attributes
+    ----------
+    alpha: float (positive)
+        Represents the trade-off between the goodness of maximum likelihood fit and regularization.
+    beta: float(positive)
+        Width of the Gaussian kernel.
+    low_rank: bool
+        Whether to use low rank approximation.
+    num_eig: int
+        Number of eigenvectors to use in lowrank calculation.
+    """
+    def __init__(self, alpha=None, beta=None, low_rank=False, num_eig=100, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if alpha is not None and (not isinstance(alpha, numbers.Number) or alpha <= 0):
+            raise ValueError(
+                "Expected a positive value for regularization parameter alpha. Instead got: {}".format(alpha))
+        if beta is not None and (not isinstance(beta, numbers.Number) or beta <= 0):
+            raise ValueError(
+                "Expected a positive value for the width of the coherent Gaussian kerenl. Instead got: {}".format(beta))
+        self.alpha = 2 if alpha is None else alpha
+        self.beta = 2 if beta is None else beta
+        self.W = np.zeros((self.M, self.D))
+        self.G = gaussian_kernel(self.Y, self.beta)
+        self.low_rank = low_rank
+        self.num_eig = num_eig
+        if self.low_rank is True:
+            self.Q, self.S = low_rank_eigen(self.G, self.num_eig)
+            self.inv_S = np.diag(1./self.S)
+            self.S = np.diag(self.S)
+            self.E = 0.
+    def update_transform(self):
+        """
+        Calculate a new estimate of the deformable transformation.
+        See Eq. 22 of https://arxiv.org/pdf/0905.2635.pdf.
+        """
+        if self.low_rank is False:
+            A = np.dot(np.diag(self.P1), self.G) + \
+                self.alpha * self.sigma2 * np.eye(self.M)
+            B = self.PX - np.dot(np.diag(self.P1), self.Y)
+            self.W = np.linalg.solve(A, B)
+        elif self.low_rank is True:
+            # Matlab code equivalent can be found here:
+            # https://github.com/markeroon/matlab-computer-vision-routines/tree/master/third_party/CoherentPointDrift
+            dP = np.diag(self.P1)
+            dPQ = np.matmul(dP, self.Q)
+            F = self.PX - np.matmul(dP, self.Y)
+            self.W = 1 / (self.alpha * self.sigma2) * (F - np.matmul(dPQ, (
+                np.linalg.solve((self.alpha * self.sigma2 * self.inv_S + np.matmul(self.Q.T, dPQ)),
+                                (np.matmul(self.Q.T, F))))))
+            QtW = np.matmul(self.Q.T, self.W)
+            self.E = self.E + self.alpha / 2 * np.trace(np.matmul(QtW.T, np.matmul(self.S, QtW)))
+    def transform_point_cloud(self, Y=None):
+        """
+        Update a point cloud using the new estimate of the deformable transformation.
+        Attributes
+        ----------
+        Y: numpy array, optional
+            Array of points to transform - use to predict on new set of points.
+            Best for predicting on new points not used to run initial registration.
+                If None, self.Y used.
+        Returns
+        -------
+        If Y is None, returns None.
+        Otherwise, returns the transformed Y.
+        """
+        self.W[:,2:]=0
+        if Y is not None:
+            G = gaussian_kernel(X=Y, beta=self.beta, Y=self.Y)
+            return Y + np.dot(G, self.W)
+        else:
+            if self.low_rank is False:
+                self.TY = self.Y + np.dot(self.G, self.W)
+            elif self.low_rank is True:
+                self.TY = self.Y + np.matmul(self.Q, np.matmul(self.S, np.matmul(self.Q.T, self.W)))
+                return
+    def update_variance(self):
+        """
+        Update the variance of the mixture model using the new estimate of the deformable transformation.
+        See the update rule for sigma2 in Eq. 23 of of https://arxiv.org/pdf/0905.2635.pdf.
+        """
+        qprev = self.sigma2
+        # The original CPD paper does not explicitly calculate the objective functional.
+        # This functional will include terms from both the negative log-likelihood and
+        # the Gaussian kernel used for regularization.
+        self.q = np.inf
+        xPx = np.dot(np.transpose(self.Pt1), np.sum(
+            np.multiply(self.X, self.X), axis=1))
+        yPy = np.dot(np.transpose(self.P1),  np.sum(
+            np.multiply(self.TY, self.TY), axis=1))
+        trPXY = np.sum(np.multiply(self.TY, self.PX))
+        self.sigma2 = (xPx - 2 * trPXY + yPy) / (self.Np * self.D)
+        if self.sigma2 <= 0:
+            self.sigma2 = self.tolerance / 10
+        # Here we use the difference between the current and previous
+        # estimate of the variance as a proxy to test for convergence.
+        self.diff = np.abs(self.sigma2 - qprev)
+    def get_registration_parameters(self):
+        """
+        Return the current estimate of the deformable transformation parameters.
+        Returns
+        -------
+        self.G: numpy array
+            Gaussian kernel matrix.
+        self.W: numpy array
+            Deformable transformation matrix.
+        """
+        return self.G, self.W
+def initialize_sigma2(X, Y):
+    """
+    Initialize the variance (sigma2).
+    param
+    ----------
+    X: numpy array
+        NxD array of points for target.
+    Y: numpy array
+        MxD array of points for source.
+    Returns
+    -------
+    sigma2: float
+        Initial variance.
+    """
+    (N, D) = X.shape
+    (M, _) = Y.shape
+    diff = X[None, :, :] - Y[:, None, :]
+    err = diff ** 2
+    return np.sum(err) / (D * M * N)
+def gaussian_kernel(X, beta, Y=None):
+    """
+    Computes a Gaussian (RBF) kernel matrix between two sets of vectors.
+    :param X: A numpy array of shape (n_samples_X, n_features) representing the first set of vectors.
+    :param beta: The standard deviation parameter for the Gaussian kernel. It controls the spread of the kernel.
+    :param Y: An optional numpy array of shape (n_samples_Y, n_features) representing the second set of vectors.
+              If None, the function computes the kernel between `X` and itself (i.e., the Gram matrix).
+    :return: A numpy array of shape (n_samples_X, n_samples_Y) representing the Gaussian kernel matrix.
+             Each element (i, j) in the matrix is computed as:
+             `exp(-||X[i] - Y[j]||^2 / (2 * beta^2))`
+    """
+    # If Y is not provided, use X for both sets, computing the kernel matrix between X and itself
+    if Y is None:
+        Y = X
+    # Compute the difference tensor between each pair of vectors in X and Y
+    # The resulting shape is (n_samples_X, n_samples_Y, n_features)
+    diff = X[:, None, :] - Y[None, :, :]
+    # Square the differences element-wise
+    diff = np.square(diff)
+    # Sum the squared differences across the feature dimension (axis 2) to get squared Euclidean distances
+    # The resulting shape is (n_samples_X, n_samples_Y)
+    diff = np.sum(diff, axis=2)
+    # Apply the Gaussian (RBF) kernel formula: exp(-||X[i] - Y[j]||^2 / (2 * beta^2))
+    kernel_matrix = np.exp(-diff / (2 * beta**2))
+    return kernel_matrix
+def low_rank_eigen(G, num_eig):
+    """
+    Calculate the top `num_eig` eigenvectors and eigenvalues of a given Gaussian matrix G.
+    This function is useful for dimensionality reduction or when a low-rank approximation is needed.
+    :param G: A square matrix (numpy array) for which the eigen decomposition is to be performed.
+    :param num_eig: The number of top eigenvectors and eigenvalues to return, based on the magnitude of eigenvalues.
+    :return: A tuple containing:
+             - Q: A numpy array with shape (n, num_eig) containing the top `num_eig` eigenvectors of the matrix `G`.
+               Each column in `Q` corresponds to an eigenvector.
+             - S: A numpy array of shape (num_eig,) containing the top `num_eig` eigenvalues of the matrix `G`.
+    """
+    # Perform eigen decomposition on matrix G
+    # `S` will contain all the eigenvalues, and `Q` will contain the corresponding eigenvectors
+    S, Q = np.linalg.eigh(G)
+    # Sort eigenvalues in descending order based on their absolute values
+    # Get the indices of the top `num_eig` largest eigenvalues
+    eig_indices = list(np.argsort(np.abs(S))[::-1][:num_eig])
+    # Select the corresponding top eigenvectors based on the sorted indices
+    Q = Q[:, eig_indices]  # Q now contains the top `num_eig` eigenvectors
+    # Select the top `num_eig` eigenvalues based on the sorted indices
+    S = S[eig_indices]  # S now contains the top `num_eig` eigenvalues
+    return Q, S
+def find_homography_translation_rotation(src_points, dst_points):
+    """
+    Find the homography between two sets of coordinates with only translation and rotation.
+    :param src_points: A numpy array of shape (n, 2) containing source coordinates.
+    :param dst_points: A numpy array of shape (n, 2) containing destination coordinates.
+    :return: A 3x3 homography matrix.
+    """
+    # Ensure the points are in the correct shape
+    assert src_points.shape == dst_points.shape
+    assert src_points.shape[1] == 2
+    # Calculate the centroids of the point sets
+    src_centroid = np.mean(src_points, axis=0)
+    dst_centroid = np.mean(dst_points, axis=0)
+    # Center the points around the centroids
+    centered_src_points = src_points - src_centroid
+    centered_dst_points = dst_points - dst_centroid
+    # Calculate the covariance matrix
+    H = np.dot(centered_src_points.T, centered_dst_points)
+    # Singular Value Decomposition (SVD)
+    U, S, Vt = np.linalg.svd(H)
+    # Calculate the rotation matrix
+    R = np.dot(Vt.T, U.T)
+    # Ensure a proper rotation matrix (det(R) = 1)
+    if np.linalg.det(R) < 0:
+        Vt[-1, :] *= -1
+        R = np.dot(Vt.T, U.T)
+    # Calculate the translation vector
+    t = dst_centroid - np.dot(R, src_centroid)
+    # Construct the homography matrix
+    homography_matrix = np.eye(3)
+    homography_matrix[0:2, 0:2] = R
+    homography_matrix[0:2, 2] = t
+    return homography_matrix
+def apply_homography(coordinates, H):
+    """
+    Apply a 3x3 homography matrix to 2D coordinates.
+    :param coordinates: A numpy array of shape (n, 2) containing 2D coordinates.
+    :param H: A numpy array of shape (3, 3) representing the homography matrix.
+    :return: A numpy array of shape (n, 2) with transformed coordinates.
+    """
+    # Convert (x, y) to homogeneous coordinates (x, y, 1)
+    n = coordinates.shape[0]
+    homogeneous_coords = np.hstack((coordinates, np.ones((n, 1))))
+    # Apply the homography matrix
+    transformed_homogeneous = np.dot(homogeneous_coords, H.T)
+    # Convert back from homogeneous coordinates (x', y', w') to (x'/w', y'/w')
+    transformed_coords = transformed_homogeneous[:, :2] / transformed_homogeneous[:, [2]]
+    return transformed_coords
+def align_tissue(ad_tar_coor, ad_src_coor, pca_comb_features, src_img, alpha=0.5):
+    """
+    Aligns the source coordinates to the target coordinates using Coherent Point Drift (CPD)
+    registration, and applies a homography transformation to warp the source coordinates accordingly.
+    :param ad_tar_coor: Numpy array of target coordinates to which the source will be aligned.
+    :param ad_src_coor: Numpy array of source coordinates that will be aligned to the target.
+    :param pca_comb_features: PCA-combined feature matrix used as additional features for the alignment process.
+    :param src_img: Source image to be warped based on the alignment.
+    :param alpha: Regularization parameter for CPD registration, default is 0.5.
+    :return:
+        - cpd_coor: The new source coordinates after CPD alignment.
+        - homo_coor: The source coordinates after applying the homography transformation.
+        - aligned_image: The source image warped based on the homography transformation.
+    """
+    # Normalize target and source coordinates to the range [0, 1]
+    ad_tar_coor_z = (ad_tar_coor - ad_tar_coor.min()) / (ad_tar_coor.max() - ad_tar_coor.min())
+    ad_src_coor_z = (ad_src_coor - ad_src_coor.min()) / (ad_src_coor.max() - ad_src_coor.min())
+    # Normalize PCA-combined features to the range [0, 1]
+    pca_comb_features_z = (pca_comb_features - pca_comb_features.min()) / (pca_comb_features.max() - pca_comb_features.min())
+    # Concatenate spatial and PCA-combined features for target and source
+    target = np.concatenate((ad_tar_coor_z, pca_comb_features_z[:ad_tar_coor.shape[0], :2]), axis=1)
+    source = np.concatenate((ad_src_coor_z, pca_comb_features_z[ad_tar_coor.shape[0]:, :2]), axis=1)
+    # Initialize and run the CPD registration (deformable with regularization)
+    reg = DeformableRegistration(X=target, Y=source, low_rank=True,
+                                 alpha=alpha,
+                                 max_iterations=int(1e9), tolerance=1e-9)
+    TY = reg.register()[0]  # TY contains the transformed source points
+    # Rescale the CPD-aligned coordinates back to the original range of target coordinates
+    cpd_coor = TY[:, :2] * (ad_tar_coor.max() - ad_tar_coor.min()) + ad_tar_coor.min()
+    # Find homography transformation based on CPD-aligned coordinates and apply it
+    h = find_homography_translation_rotation(ad_src_coor, cpd_coor)
+    homo_coor = apply_homography(ad_src_coor, h)
+    # Warp the source image using the computed homography
+    aligned_image = cv2.warpPerspective(src_img, h, (src_img.shape[1], src_img.shape[0]))
+    # Return the CPD-aligned coordinates, the homography-transformed coordinates, and the warped image
+    return cpd_coor, homo_coor, aligned_image

src/build/lib/loki/annotate.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import numpy as np
+import torch
+from torch.nn import functional as F
+import os
+import scanpy as sc
+import json
+import cv2
+def annotate_with_bulk(img_features, bulk_features, normalize=True, T=1, tensor=False):
+    """
+    Annotates tissue image with similarity scores between image features and bulk RNA-seq features.
+    :param img_features: Feature matrix representing histopathology image features.
+    :param bulk_features: Feature vector representing bulk RNA-seq features.
+    :param normalize: Whether to normalize similarity scores, default=True.
+    :param T: Temperature parameter to control the sharpness of the softmax distribution. Higher values result in a smoother distribution.
+    :param tensor: Feature format in torch tensor or not, default=False.
+    :return: An array or tensor containing the normalized similarity scores.
+    """
+    if tensor:
+        # Compute similarity between image features and bulk RNA-seq features
+        cosine_similarity = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
+        similarity = cosine_similarity(img_features, bulk_features.unsqueeze(0))  # Shape: [n]
+        # Optional normalization using the feature vector's norm
+        if normalize:
+            normalization_factor = torch.sqrt(torch.tensor([bulk_features.shape[0]], dtype=torch.float))  # sqrt(768)
+            similarity = similarity / normalization_factor
+        # Reshape and apply temperature scaling for softmax
+        similarity = similarity.unsqueeze(0)  # Shape: [1, n]
+        similarity = similarity / T  # Control distribution sharpness
+        # Convert similarity scores to probability distribution using softmax
+        similarity = torch.nn.functional.softmax(similarity, dim=-1)  # Shape: [1, n]
+    else:
+        # Compute similarity for non-tensor mode
+        similarity = np.dot(img_features.T, bulk_features)
+        # Apply a softmax-like normalization for numerical stability
+        max_similarity = np.max(similarity)  # Maximum value for stability
+        similarity = np.exp(similarity - max_similarity) / np.sum(np.exp(similarity - max_similarity))
+        # Normalize similarity scores to [0, 1] range for interpretation
+        similarity = (similarity - np.min(similarity)) / (np.max(similarity) - np.min(similarity))
+    return similarity
+def annotate_with_marker_genes(classes, image_embeddings, all_text_embeddings):
+    """
+    Annotates tissue image with similarity scores between image features and marker gene features.
+    :param classes: A list or array of tissue type labels.
+    :param image_embeddings: A numpy array or torch tensor of image embeddings (shape: [n_images, embedding_dim]).
+    :param all_text_embeddings: A numpy array or torch tensor of text embeddings of the marker genes
+                                (shape: [n_classes, embedding_dim]).
+    :return:
+        - dot_similarity: The matrix of dot product similarities between image embeddings and text embeddings.
+        - pred_class: The predicted tissue type for the image based on the highest similarity score.
+    """
+    # Calculate dot product similarity between image embeddings and text embeddings
+    # This results in a similarity matrix of shape [n_images, n_classes]
+    dot_similarity = image_embeddings @ all_text_embeddings.T
+    # Find the class with the highest similarity for each image
+    # Use argmax to identify the index of the highest similarity score
+    pred_class = classes[dot_similarity.argmax()]
+    return dot_similarity, pred_class
+def load_image_annotation(image_path):
+    """
+    Loads an image with annotation.
+    :param image_path: The file path to the image.
+    :return: The processed image, converted to BGR color space and of type uint8.
+    """
+    # Load the image from the specified file path using OpenCV
+    image = cv2.imread(image_path)
+    # Convert the color from RGB (OpenCV loads as BGR by default) to BGR (which matches common color standards)
+    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    # Ensure the image is of type uint8 for proper handling in OpenCV and other image processing libraries
+    image = image.astype(np.uint8)
+    return image

src/build/lib/loki/decompose.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import pandas as pd
+import tangram as tg
+import numpy as np
+import torch
+import anndata
+from sklearn.decomposition import PCA
+from sklearn.neighbors import NearestNeighbors
+def generate_feature_ad(ad_expr, feature_path, sc=False):
+    """
+    Generates an AnnData object with OmiCLIP text or image embeddings.
+    :param ad_expr: AnnData object containing metadata for the dataset.
+    :param feature_path: Path to the CSV file containing the features to be loaded.
+    :param sc: Boolean flag indicating whether to copy single-cell metadata or ST metadata. Default is False (ST).
+    :return: A new AnnData object with the loaded features and relevant metadata from ad_expr.
+    """
+    # Load features from the CSV file. The index should match the cells/spots in ad_expr.obs.index.
+    features = pd.read_csv(feature_path, index_col=0)[ad_expr.obs.index]
+    # Create a new AnnData object with the features, transposing them to have cells/spots as rows
+    feature_ad = anndata.AnnData(features[ad_expr.obs.index].T)
+    # Copy relevant metadata from ad_expr based on the sc flag
+    if sc:
+        # If the data is single-cell (sc), copy the metadata from ad_expr.obs
+        feature_ad.obs = ad_expr.obs.copy()
+    else:
+        # If the data is spatial, copy the 'cell_num', 'spatial' info, and spatial coordinates
+        feature_ad.obs['cell_num'] = ad_expr.obs['cell_num'].copy()
+        feature_ad.uns['spatial'] = ad_expr.uns['spatial'].copy()
+        feature_ad.obsm['spatial'] = ad_expr.obsm['spatial'].copy()
+    return feature_ad
+def normalize_percentile(df, cols, min_percentile=5, max_percentile=95):
+    """
+    Clips and normalizes the specified columns of a DataFrame based on percentile thresholds,
+    transforming their values to the [0, 1] range.
+    :param df: A pandas DataFrame containing the columns to normalize.
+    :type df: pandas.DataFrame
+    :param cols: A list of column names in `df` that should be normalized.
+    :type cols: list[str]
+    :param min_percentile: The lower percentile used for clipping (defaults to 5).
+    :type min_percentile: float
+    :param max_percentile: The upper percentile used for clipping (defaults to 95).
+    :type max_percentile: float
+    :return: The same DataFrame with specified columns clipped and normalized.
+    :rtype: pandas.DataFrame
+    """
+    # Iterate over each column that needs to be normalized
+    for col in cols:
+        # Compute the lower and upper values at the given percentiles
+        min_val = np.percentile(df[col], min_percentile)
+        max_val = np.percentile(df[col], max_percentile)
+        # Clip the column's values between these percentile thresholds
+        df[col] = np.clip(df[col], min_val, max_val)
+        # Perform min-max normalization to scale the clipped values to the [0, 1] range
+        df[col] = (df[col] - min_val) / (max_val - min_val)
+    return df
+def cell_type_decompose(sc_ad, st_ad, cell_type_col='cell_type', NMS_mode=False, major_types=None, min_percentile=5, max_percentile=95):
+    """
+    Performs cell type decomposition on spatial data (ST or image) with single-cell data .
+    :param sc_ad: AnnData object containing single-cell meta data.
+    :param st_ad: AnnData object containing spatial data (ST or image) meta data.
+    :param density_prior: A numpy array providing prior information about cell densities in spatial spots.
+    :param cell_type_col: The column name in `sc_ad.obs` that contains cell type annotations. Default is 'cell_type'.
+    :param target_count: If True, sums up the total number of cells in `st_ad.obs['cell_num']`. Can also be set to a specific value.
+    :param pca_mode: Boolean flag to apply PCA for dimensionality reduction. Default is True.
+    :param n_components: Number of PCA components to use if `pca_mode` is True. Default is 300.
+    :return: The spatial AnnData object with projected cell type annotations.
+    """
+    # Preprocess the data for decomposition using tangram (tg)
+    tg.pp_adatas(sc_ad, st_ad, genes=None)  # Preprocessing: match genes between single-cell and spatial data
+    # Map single-cell data to spatial data using Tangram's "map_cells_to_space" function
+    ad_map = tg.map_cells_to_space(
+        sc_ad, st_ad,
+        mode="clusters",  # Map based on clusters (cell types)
+        cluster_label=cell_type_col,  # Column in `sc_ad.obs` representing cell type
+        device='cpu',  # Run on CPU (or 'cuda' if GPU is available)
+        scale=False,  # Don't scale data (can be set to True if needed)
+        density_prior='uniform',  # Use prior information for cell densities
+        random_state=10,  # Set random state for reproducibility
+        verbose=False,  # Disable verbose output for cleaner logging
+    )
+    # Project cell type annotations from the single-cell data to the spatial data
+    tg.project_cell_annotations(ad_map, st_ad, annotation=cell_type_col)
+    if NMS_mode:
+        major_types = major_types
+        st_ad.obs = normalize_percentile(st_ad.obsm['tangram_ct_pred'], major_types, min_percentile, max_percentile)
+        st_ad_binary = st_ad.obsm['tangram_ct_pred'][major_types].copy()
+        # Retain the max value in each row and set the rest to 0
+        st_ad.obs[major_types] = st_ad_binary.where(st_ad_binary.eq(st_ad_binary.max(axis=1), axis=0), other=0)
+    return st_ad  # Return the spatial AnnData object with the projected annotations
+def assign_cells_to_spots(cell_locs, spot_locs, patch_size=16):
+    """
+    Assigns cells to spots based on their spatial coordinates. Each cell within the specified patch size (radius)
+    of a spot will be assigned to that spot.
+    :param cell_locs: Numpy array of shape (n_cells, 2) with the x, y coordinates of the cells.
+    :param spot_locs: Numpy array of shape (n_spots, 2) with the x, y coordinates of the spots.
+    :param patch_size: The diameter of the spot patch. The radius used for assignment will be half of this value.
+    :return: A sparse matrix where each row corresponds to a cell and each column corresponds to a spot.
+                        The value is 1 if the cell is assigned to that spot, 0 otherwise.
+    """
+    # Initialize the NearestNeighbors model with a radius equal to half the patch size
+    neigh = NearestNeighbors(radius=patch_size * 0.5)
+    # Fit the model on the spot locations
+    neigh.fit(spot_locs)
+    # Create the radius neighbors graph which will assign cells to spots based on proximity
+    # This graph is a sparse matrix where rows are cells and columns are spots, with a 1 indicating assignment
+    A = neigh.radius_neighbors_graph(cell_locs, mode='connectivity')
+    return A

src/build/lib/loki/plot.py ADDED Viewed

	@@ -0,0 +1,435 @@

+import matplotlib.pyplot as plt
+from pathlib import Path
+import json
+import cv2
+from matplotlib import cm
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+def plot_alignment(ad_tar_coor, ad_src_coor, homo_coor, pca_hex_comb, tar_features, shift=300, s=0.8, boundary_line=True):
+    """
+    Plots the target coordinates and alignment of source coordinates.
+    :param ad_tar_coor: Numpy array of target coordinates to be plotted in the first subplot.
+    :param ad_src_coor: Numpy array of source coordinates to be plotted in the second subplot.
+    :param homo_coor: Numpy array of alignment of source coordinates to be plotted in the third subplot.
+    :param pca_hex_comb: Color values (e.g., PCA or hex values) for plotting the coordinates.
+    :param tar_features: Feature matrix for the target, used to split color values between target and source data.
+    :param shift: Value used to adjust the plot limits around the coordinates for better visualization. Default is 300.
+    :param s: Marker size for the scatter plot points. Default is 0.8.
+    :param boundary_line: Boolean indicating whether to draw boundary lines (horizontal and vertical lines). Default is True.
+    :return: Displays the alignment plot of target, source, and alignment of source coordinates.
+    """
+    # Create a figure with three subplots, adjusting size and resolution
+    plt.figure(figsize=(10, 3), dpi=300)
+    # First subplot: Plot target coordinates
+    plt.subplot(1, 3, 1)
+    plt.scatter(ad_tar_coor[:, 0], ad_tar_coor[:, 1], marker='o', s=s, c=pca_hex_comb[:len(tar_features.T)])
+    # Set plot limits based on the minimum and maximum target coordinates, with extra padding from 'shift'
+    plt.xlim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    plt.ylim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    # Second subplot: Plot source coordinates
+    plt.subplot(1, 3, 2)
+    plt.scatter(ad_src_coor[:, 0], ad_src_coor[:, 1], marker='o', s=s, c=pca_hex_comb[len(tar_features.T):])
+    # Ensure consistent plot limits across subplots by using the same limits as the target coordinates
+    plt.xlim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    plt.ylim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    # Third subplot: Plot alignment of source coordinates
+    plt.subplot(1, 3, 3)
+    plt.scatter(homo_coor[:, 0], homo_coor[:, 1], marker='o', s=s, c=pca_hex_comb[len(tar_features.T):])
+    # Maintain the same plot limits across all subplots for a uniform comparison
+    plt.xlim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    plt.ylim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    # Optionally draw boundary lines at the minimum x and y values of the target coordinates
+    if boundary_line:
+        plt.axvline(x=ad_tar_coor[:, 0].min(), color='black')  # Vertical boundary line at the minimum x of target coordinates
+        plt.axhline(y=ad_tar_coor[:, 1].min(), color='black')  # Horizontal boundary line at the minimum y of target coordinates
+    # Remove the axis labels and ticks from all subplots for a cleaner appearance
+    plt.axis('off')
+    # Display the plot
+    plt.show()
+def plot_alignment_with_img(ad_tar_coor, ad_src_coor, homo_coor, tar_img, src_img, aligned_image, pca_hex_comb, tar_features):
+    """
+    Plots the target coordinates and alignment of source coordinates with their respective images in the background.
+    :param ad_tar_coor: Numpy array of target coordinates to be plotted in the first and third subplots.
+    :param ad_src_coor: Numpy array of source coordinates to be plotted in the second subplot.
+    :param homo_coor: Numpy array of alignment of source coordinates to be plotted in the third subplot.
+    :param tar_img: Image associated with the target coordinates, used as the background in the first subplot.
+    :param src_img: Image associated with the source coordinates, used as the background in the second subplot.
+    :param aligned_image: Image associated with the aligned coordinates, used as the background in the third subplot.
+    :param pca_hex_comb: Color values (e.g., PCA or hex values) for plotting the coordinates.
+    :param tar_features: Feature matrix for the target, used to split color values between target and source data.
+    :return: Displays the alignment plot of target, source, and alignment of source coordinates with their associated images.
+    """
+    # Create a figure with three subplots and set the size and resolution
+    plt.figure(figsize=(10, 8), dpi=150)
+    # First subplot: Plot target coordinates with the target image as the background
+    plt.subplot(1, 3, 1)
+    # Scatter plot for the target coordinates with transparency and small marker size
+    plt.scatter(ad_tar_coor[:, 0], ad_tar_coor[:, 1], marker='o', alpha=0.8, s=1, c=pca_hex_comb[:len(tar_features.T)])
+    # Overlay the target image with some transparency (alpha = 0.3)
+    plt.imshow(tar_img, origin='lower', alpha=0.3)
+    # Second subplot: Plot source coordinates with the source image as the background
+    plt.subplot(1, 3, 2)
+    # Scatter plot for the source coordinates with transparency and small marker size
+    plt.scatter(ad_src_coor[:, 0], ad_src_coor[:, 1], marker='o', alpha=0.8, s=1, c=pca_hex_comb[len(tar_features.T):])
+    # Overlay the source image with some transparency (alpha = 0.3)
+    plt.imshow(src_img, origin='lower', alpha=0.3)
+    # Third subplot: Plot both target and alignment of source coordinates with the aligned image as the background
+    plt.subplot(1, 3, 3)
+    # Scatter plot for the target coordinates with lower opacity (alpha = 0.2)
+    plt.scatter(ad_tar_coor[:, 0], ad_tar_coor[:, 1], marker='o', alpha=0.2, s=1, c=pca_hex_comb[:len(tar_features.T)])
+    # Scatter plot for the homologous coordinates with a '+' marker and the same color mapping
+    plt.scatter(homo_coor[:, 0], homo_coor[:, 1], marker='+', s=1, c=pca_hex_comb[len(tar_features.T):])
+    # Overlay the aligned image with some transparency (alpha = 0.3)
+    plt.imshow(aligned_image, origin='lower', alpha=0.3)
+    # Turn off the axis for all subplots to give a cleaner visual output
+    plt.axis('off')
+    # Display the plots
+    plt.show()
+def draw_polygon(image, polygon, color='k', thickness=2):
+    """
+    Draws one or more polygons on the given image.
+    :param image: The image on which to draw the polygons (as a numpy array).
+    :param polygon: A list of polygons, where each polygon is a list of (x, y) coordinate tuples.
+    :param color: A string or list of strings representing the color(s) for each polygon.
+                  If a single color is provided, it will be applied to all polygons. Default is 'k' (black).
+    :param thickness: An integer or a list of integers representing the thickness of the polygon borders.
+                      If a single value is provided, it will be applied to all polygons. Default is 2.
+    :return: The image with the polygons drawn on it.
+    """
+    # If the provided `color` is a single value (string), convert it to a list of the same color for each polygon
+    if not isinstance(color, list):
+        color = [color] * len(polygon)  # Create a list where each polygon gets the same color
+    # Loop through each polygon in the list, along with its corresponding color
+    for i, poly in enumerate(polygon):
+        # Get the color for the current polygon
+        c = color[i]
+        # Convert the color from a string format (e.g., 'k' or '#ff0000') to an RGB tuple
+        c = color_string_to_rgb(c)
+        # Get the thickness value for the current polygon (if a list is provided, use the corresponding value)
+        t = thickness[i] if isinstance(thickness, list) else thickness
+        # Convert the polygon coordinates to a numpy array of integers
+        poly = np.array(poly, np.int32)
+        # Reshape the polygon array to match OpenCV's expected input format: (number of points, 1, 2)
+        poly = poly.reshape((-1, 1, 2))
+        # Draw the polygon on the image using OpenCV's `cv2.polylines` function
+        # `isClosed=True` indicates that the polygon should be closed (start and end points are connected)
+        image = cv2.polylines(image, [poly], isClosed=True, color=c, thickness=t)
+    return image
+def blend_images(image1, image2, alpha=0.5):
+    """
+    Blends two images together.
+    :param image1: Background image, a numpy array of shape (H, W, 3), where H is height, W is width, and 3 represents the RGB color channels.
+    :param image2: Foreground image, a numpy array of shape (H, W, 3), same dimensions as image1.
+    :param alpha: Blending factor, a float between 0 and 1. The value of alpha determines the weight of image1 in the blend,
+                  where 0 means only image2 is shown, and 1 means only image1 is shown. Default is 0.5 (equal blending).
+    :return: A blended image, where each pixel is a weighted combination of the corresponding pixels from image1 and image2.
+            The blending is computed as: `blended = alpha * image1 + (1 - alpha) * image2`.
+    """
+    # Use cv2.addWeighted to blend the two images.
+    # The first image (image1) is weighted by 'alpha', and the second image (image2) is weighted by '1 - alpha'.
+    blended = cv2.addWeighted(image1, alpha, image2, 1 - alpha, 0)
+    # Return the resulting blended image.
+    return blended
+def color_string_to_rgb(color_string):
+    """
+    Converts a color string to an RGB tuple.
+    :param color_string: A string representing the color. This can be in hexadecimal form (e.g., '#ff0000') or
+                         a shorthand character for basic colors (e.g., 'k' for black, 'r' for red, etc.).
+    :return:
+            A tuple (r, g, b) representing the RGB values of the color, where each value is an integer between 0 and 255.
+    :raises:
+            ValueError: If the color string is not recognized.
+    """
+    # Remove any spaces in the color string
+    color_string = color_string.replace(' ', '')
+    # If the string starts with a '#', it's a hexadecimal color, so we remove the '#'
+    if color_string.startswith('#'):
+        color_string = color_string[1:]
+    else:
+        # Handle shorthand single-letter color codes by converting them to hex values
+        # 'k' -> black, 'r' -> red, 'g' -> green, 'b' -> blue, 'w' -> white
+        if color_string == 'k':  # Black
+            color_string = '000000'
+        elif color_string == 'r':  # Red
+            color_string = 'ff0000'
+        elif color_string == 'g':  # Green
+            color_string = '00ff00'
+        elif color_string == 'b':  # Blue
+            color_string = '0000ff'
+        elif color_string == 'w':  # White
+            color_string = 'ffffff'
+        else:
+            # Raise an error if the color string is not recognized
+            raise ValueError(f"Unknown color string {color_string}")
+    # Convert the first two characters to the red (R) value
+    r = int(color_string[:2], 16)
+    # Convert the next two characters to the green (G) value
+    g = int(color_string[2:4], 16)
+    # Convert the last two characters to the blue (B) value
+    b = int(color_string[4:], 16)
+    # Return the RGB values as a tuple
+    return (r, g, b)
+def plot_heatmap(
+    coor,
+    similairty,
+    image_path=None,
+    patch_size=(256, 256),
+    save_path=None,
+    downsize=32,
+    cmap='turbo',
+    smooth=False,
+    boxes=None,
+    box_color='k',
+    box_thickness=2,
+    polygons=None,
+    polygons_color='k',
+    polygons_thickness=2,
+    image_alpha=0.5
+):
+    """
+    Plots a heatmap overlaid on an image based on given coordinates and similairty.
+    :param coor: Array of coordinates (N, 2) where N is the number of patches to place on the heatmap.
+    :param similairty: Array of similairty (N,) corresponding to the coordinates. These similairties are mapped to colors using a colormap.
+    :param image_path: Path to the background image on which the heatmap will be overlaid. If None, a blank white background is used.
+    :param patch_size: Size of each patch in pixels (default is 256x256).
+    :param save_path: Path to save the heatmap image. If None, the heatmap is returned instead of being saved.
+    :param downsize: Factor to downsize the image and patches for faster processing. Default is 32.
+    :param cmap: Colormap to map the similairties to colors. Default is 'turbo'.
+    :param smooth: Boolean to indicate if the heatmap should be smoothed. Not implemented in this version.
+    :param boxes: List of boxes in (x, y, w, h) format. If provided, boxes will be drawn on the heatmap.
+    :param box_color: Color of the boxes. Default is black ('k').
+    :param box_thickness: Thickness of the box outlines.
+    :param polygons: List of polygons (N, 2) to draw on the heatmap.
+    :param polygons_color: Color of the polygon outlines. Default is black ('k').
+    :param polygons_thickness: Thickness of the polygon outlines.
+    :param image_alpha: Transparency value (0 to 1) for blending the heatmap with the original image. Default is 0.5.
+    :return:
+        - heatmap: The generated heatmap as a numpy array (RGB).
+        - image: The original image with overlaid polygons if provided.
+    """
+    # Read the background image (if provided), otherwise a blank image
+    image = cv2.imread(image_path)
+    image_size = (image.shape[0], image.shape[1])  # Get the size of the image
+    coor = [(x // downsize, y // downsize) for x, y in coor]  # Downsize the coordinates for faster processing
+    patch_size = (patch_size[0] // downsize, patch_size[1] // downsize)  # Downsize the patch size
+    # Convert similairties to colors using the provided colormap
+    cmap = plt.get_cmap(cmap)  # Get the colormap object
+    norm = plt.Normalize(vmin=similairty.min(), vmax=similairty.max())  # Normalize similairties to map to color range
+    colors = cmap(norm(similairty))  # Convert the normalized similairties to RGB colors
+    # Initialize a blank white heatmap the size of the image
+    heatmap = np.ones((image_size[0], image_size[1], 3)) * 255  # Start with a white background
+    # Place the colored patches on the heatmap according to the coordinates and patch size
+    for i in range(len(coor)):
+        x, y = coor[i]
+        w = colors[i][:3] * 255  # Get the RGB color for the patch, scaling from [0, 1] to [0, 255]
+        w = w.astype(np.uint8)  # Convert the color to uint8
+        heatmap[y:y + patch_size[0], x:x + patch_size[1], :] = w  # Place the patch on the heatmap
+    # If the image_alpha is greater than 0, blend the heatmap with the original image
+    if image_alpha > 0:
+        image = np.array(image)
+        # Pad the image if necessary to match the heatmap size
+        if image.shape[0] < heatmap.shape[0]:
+            pad = heatmap.shape[0] - image.shape[0]
+            image = np.pad(image, ((0, pad), (0, 0), (0, 0)), mode='constant', constant_values=255)
+        if image.shape[1] < heatmap.shape[1]:
+            pad = heatmap.shape[1] - heatmap.shape[1]
+            image = np.pad(image, ((0, 0), (0, pad), (0, 0)), mode='constant', constant_values=255)
+        # Convert the image to BGR (for OpenCV compatibility) and blend with the heatmap
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        image = image.astype(np.uint8)
+        heatmap = heatmap.astype(np.uint8)
+        heatmap = blend_images(heatmap, image, alpha=image_alpha)  # Blend the heatmap and the image
+    # If polygons are provided, draw them on the heatmap and image
+    if polygons is not None:
+        polygons = [poly // downsize for poly in polygons]  # Downsize the polygon coordinates
+        image_polygons = draw_polygon(image, polygons, color=polygons_color, thickness=polygons_thickness)  # Draw polygons on the original image
+        heatmap_polygons = draw_polygon(heatmap, polygons, color=polygons_color, thickness=polygons_thickness)  # Draw polygons on the heatmap
+        return heatmap_polygons, image_polygons  # Return the heatmap and image with polygons drawn on them
+    else:
+        return heatmap, image  # Return the heatmap and image
+def show_images_side_by_side(image1, image2, title1=None, title2=None):
+    """
+    Displays two images side by side in a single figure.
+    :param image1: The first image to display (as a numpy array).
+    :param image2: The second image to display (as a numpy array).
+    :param title1: The title for the first image. Default is None (no title).
+    :param title2: The title for the second image. Default is None (no title).
+    :return: Displays the images side by side.
+    """
+    # Create a figure with 2 subplots (1 row, 2 columns), and set the figure size
+    fig, ax = plt.subplots(1, 2, figsize=(15,8))
+    # Display the first image on the first subplot
+    ax[0].imshow(image1)
+    # Display the second image on the second subplot
+    ax[1].imshow(image2)
+    # Set the title for the first image (if provided)
+    ax[0].set_title(title1)
+    # Set the title for the second image (if provided)
+    ax[1].set_title(title2)
+    # Remove axis labels and ticks for both images to give a cleaner look
+    ax[0].axis('off')
+    ax[1].axis('off')
+    # Show the final figure with both images displayed side by side
+    plt.show()
+def plot_img_with_annotation(fullres_img, roi_polygon, linewidth, xlim, ylim):
+    """
+    Plots image with polygons.
+    :param fullres_img: The full-resolution image to display (as a numpy array).
+    :param roi_polygon: A list of polygons, where each polygon is a list of (x, y) coordinate tuples.
+    :param linewidth: The thickness of the lines used to draw the polygons.
+    :param xlim: A tuple (xmin, xmax) defining the x-axis limits for zooming in on a specific region of the image.
+    :param ylim: A tuple (ymin, ymax) defining the y-axis limits for zooming in on a specific region of the image.
+    :return: Displays the image with ROI polygons overlaid.
+    """
+    # Create a new figure with a fixed size for displaying the image and annotations
+    plt.figure(figsize=(10, 10))
+    # Display the full-resolution image
+    plt.imshow(fullres_img)
+    # Loop through each polygon in roi_polygon and plot them on the image
+    for polygon in roi_polygon:
+        x, y = zip(*polygon)  # Unzip the list of (x, y) tuples into separate x and y coordinate lists
+        plt.plot(x, y, color='black', linewidth=linewidth)  # Plot the polygon using the specified linewidth
+    # Set the x-axis limits based on the provided tuple (xlim)
+    plt.xlim(xlim)
+    # Set the y-axis limits based on the provided tuple (ylim)
+    plt.ylim(ylim)
+    # Invert the y-axis to match the typical image display convention (origin at the top-left)
+    plt.gca().invert_yaxis()
+    # Turn off the axis to give a cleaner image display without ticks or labels
+    plt.axis('off')
+def plot_annotation_heatmap(st_ad, roi_polygon, s, linewidth, xlim, ylim):
+    """
+    Plots tissue type annotation heatmap.
+    :param st_ad: AnnData object containing coordinates in `obsm['spatial']`
+                  and similarity scores in `obs['bulk_simi']`.
+    :param roi_polygon: A list of polygons, where each polygon is a list of (x, y) coordinate tuples.
+    :param s: The size of the scatter plot markers representing each spatial transcriptomics spot.
+    :param linewidth: The thickness of the lines used to draw the polygons.
+    :param xlim: A tuple (xmin, xmax) defining the x-axis limits for zooming in on a specific region of the image.
+    :param ylim: A tuple (ymin, ymax) defining the y-axis limits for zooming in on a specific region of the image.
+    :return: Displays the heatmap with polygons overlaid.
+    """
+    # Create a new figure with a fixed size for displaying the heatmap and annotations
+    plt.figure(figsize=(10, 10))
+    # Scatter plot for the spatial transcriptomics data.
+    # The 'spatial' coordinates are plotted with color intensity based on 'bulk_simi' values.
+    plt.scatter(
+        st_ad.obsm['spatial'][:, 0], st_ad.obsm['spatial'][:, 1],  # x and y coordinates
+        c=st_ad.obs['bulk_simi'],  # Color values based on 'bulk_simi'
+        s=s,  # Size of each marker
+        vmin=0.1, vmax=0.95,  # Set the range for the color normalization
+        cmap='turbo'  # Use the 'turbo' colormap for the heatmap
+    )
+    # Loop through each polygon in roi_polygon and plot them on the image
+    for polygon in roi_polygon:
+        x, y = zip(*polygon)  # Unzip the list of (x, y) tuples into separate x and y coordinate lists
+        plt.plot(x, y, color='black', linewidth=linewidth)  # Plot the polygon using the specified linewidth
+    # Set the x-axis limits based on the provided tuple (xlim)
+    plt.xlim(xlim)
+    # Set the y-axis limits based on the provided tuple (ylim)
+    plt.ylim(ylim)
+    # Invert the y-axis to match the typical image display convention (origin at the top-left)
+    plt.gca().invert_yaxis()
+    # Turn off the axis to give a cleaner image display without ticks or labels
+    plt.axis('off')

src/build/lib/loki/plotting.py ADDED Viewed

	@@ -0,0 +1,435 @@

+import matplotlib.pyplot as plt
+from pathlib import Path
+import json
+import cv2
+from matplotlib import cm
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+def plot_alignment(ad_tar_coor, ad_src_coor, homo_coor, pca_hex_comb, tar_features, shift=300, s=0.8, boundary_line=True):
+    """
+    Plots the target coordinates and alignment of source coordinates.
+    :param ad_tar_coor: Numpy array of target coordinates to be plotted in the first subplot.
+    :param ad_src_coor: Numpy array of source coordinates to be plotted in the second subplot.
+    :param homo_coor: Numpy array of alignment of source coordinates to be plotted in the third subplot.
+    :param pca_hex_comb: Color values (e.g., PCA or hex values) for plotting the coordinates.
+    :param tar_features: Feature matrix for the target, used to split color values between target and source data.
+    :param shift: Value used to adjust the plot limits around the coordinates for better visualization. Default is 300.
+    :param s: Marker size for the scatter plot points. Default is 0.8.
+    :param boundary_line: Boolean indicating whether to draw boundary lines (horizontal and vertical lines). Default is True.
+    :return: Displays the alignment plot of target, source, and alignment of source coordinates.
+    """
+    # Create a figure with three subplots, adjusting size and resolution
+    plt.figure(figsize=(10, 3), dpi=300)
+    # First subplot: Plot target coordinates
+    plt.subplot(1, 3, 1)
+    plt.scatter(ad_tar_coor[:, 0], ad_tar_coor[:, 1], marker='o', s=s, c=pca_hex_comb[:len(tar_features.T)])
+    # Set plot limits based on the minimum and maximum target coordinates, with extra padding from 'shift'
+    plt.xlim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    plt.ylim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    # Second subplot: Plot source coordinates
+    plt.subplot(1, 3, 2)
+    plt.scatter(ad_src_coor[:, 0], ad_src_coor[:, 1], marker='o', s=s, c=pca_hex_comb[len(tar_features.T):])
+    # Ensure consistent plot limits across subplots by using the same limits as the target coordinates
+    plt.xlim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    plt.ylim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    # Third subplot: Plot alignment of source coordinates
+    plt.subplot(1, 3, 3)
+    plt.scatter(homo_coor[:, 0], homo_coor[:, 1], marker='o', s=s, c=pca_hex_comb[len(tar_features.T):])
+    # Maintain the same plot limits across all subplots for a uniform comparison
+    plt.xlim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    plt.ylim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    # Optionally draw boundary lines at the minimum x and y values of the target coordinates
+    if boundary_line:
+        plt.axvline(x=ad_tar_coor[:, 0].min(), color='black')  # Vertical boundary line at the minimum x of target coordinates
+        plt.axhline(y=ad_tar_coor[:, 1].min(), color='black')  # Horizontal boundary line at the minimum y of target coordinates
+    # Remove the axis labels and ticks from all subplots for a cleaner appearance
+    plt.axis('off')
+    # Display the plot
+    plt.show()
+def plot_alignment_with_img(ad_tar_coor, ad_src_coor, homo_coor, tar_img, src_img, aligned_image, pca_hex_comb, tar_features):
+    """
+    Plots the target coordinates and alignment of source coordinates with their respective images in the background.
+    :param ad_tar_coor: Numpy array of target coordinates to be plotted in the first and third subplots.
+    :param ad_src_coor: Numpy array of source coordinates to be plotted in the second subplot.
+    :param homo_coor: Numpy array of alignment of source coordinates to be plotted in the third subplot.
+    :param tar_img: Image associated with the target coordinates, used as the background in the first subplot.
+    :param src_img: Image associated with the source coordinates, used as the background in the second subplot.
+    :param aligned_image: Image associated with the aligned coordinates, used as the background in the third subplot.
+    :param pca_hex_comb: Color values (e.g., PCA or hex values) for plotting the coordinates.
+    :param tar_features: Feature matrix for the target, used to split color values between target and source data.
+    :return: Displays the alignment plot of target, source, and alignment of source coordinates with their associated images.
+    """
+    # Create a figure with three subplots and set the size and resolution
+    plt.figure(figsize=(10, 8), dpi=150)
+    # First subplot: Plot target coordinates with the target image as the background
+    plt.subplot(1, 3, 1)
+    # Scatter plot for the target coordinates with transparency and small marker size
+    plt.scatter(ad_tar_coor[:, 0], ad_tar_coor[:, 1], marker='o', alpha=0.8, s=1, c=pca_hex_comb[:len(tar_features.T)])
+    # Overlay the target image with some transparency (alpha = 0.3)
+    plt.imshow(tar_img, origin='lower', alpha=0.3)
+    # Second subplot: Plot source coordinates with the source image as the background
+    plt.subplot(1, 3, 2)
+    # Scatter plot for the source coordinates with transparency and small marker size
+    plt.scatter(ad_src_coor[:, 0], ad_src_coor[:, 1], marker='o', alpha=0.8, s=1, c=pca_hex_comb[len(tar_features.T):])
+    # Overlay the source image with some transparency (alpha = 0.3)
+    plt.imshow(src_img, origin='lower', alpha=0.3)
+    # Third subplot: Plot both target and alignment of source coordinates with the aligned image as the background
+    plt.subplot(1, 3, 3)
+    # Scatter plot for the target coordinates with lower opacity (alpha = 0.2)
+    plt.scatter(ad_tar_coor[:, 0], ad_tar_coor[:, 1], marker='o', alpha=0.2, s=1, c=pca_hex_comb[:len(tar_features.T)])
+    # Scatter plot for the homologous coordinates with a '+' marker and the same color mapping
+    plt.scatter(homo_coor[:, 0], homo_coor[:, 1], marker='+', s=1, c=pca_hex_comb[len(tar_features.T):])
+    # Overlay the aligned image with some transparency (alpha = 0.3)
+    plt.imshow(aligned_image, origin='lower', alpha=0.3)
+    # Turn off the axis for all subplots to give a cleaner visual output
+    plt.axis('off')
+    # Display the plots
+    plt.show()
+def draw_polygon(image, polygon, color='k', thickness=2):
+    """
+    Draws one or more polygons on the given image.
+    :param image: The image on which to draw the polygons (as a numpy array).
+    :param polygon: A list of polygons, where each polygon is a list of (x, y) coordinate tuples.
+    :param color: A string or list of strings representing the color(s) for each polygon.
+                  If a single color is provided, it will be applied to all polygons. Default is 'k' (black).
+    :param thickness: An integer or a list of integers representing the thickness of the polygon borders.
+                      If a single value is provided, it will be applied to all polygons. Default is 2.
+    :return: The image with the polygons drawn on it.
+    """
+    # If the provided `color` is a single value (string), convert it to a list of the same color for each polygon
+    if not isinstance(color, list):
+        color = [color] * len(polygon)  # Create a list where each polygon gets the same color
+    # Loop through each polygon in the list, along with its corresponding color
+    for i, poly in enumerate(polygon):
+        # Get the color for the current polygon
+        c = color[i]
+        # Convert the color from a string format (e.g., 'k' or '#ff0000') to an RGB tuple
+        c = color_string_to_rgb(c)
+        # Get the thickness value for the current polygon (if a list is provided, use the corresponding value)
+        t = thickness[i] if isinstance(thickness, list) else thickness
+        # Convert the polygon coordinates to a numpy array of integers
+        poly = np.array(poly, np.int32)
+        # Reshape the polygon array to match OpenCV's expected input format: (number of points, 1, 2)
+        poly = poly.reshape((-1, 1, 2))
+        # Draw the polygon on the image using OpenCV's `cv2.polylines` function
+        # `isClosed=True` indicates that the polygon should be closed (start and end points are connected)
+        image = cv2.polylines(image, [poly], isClosed=True, color=c, thickness=t)
+    return image
+def blend_images(image1, image2, alpha=0.5):
+    """
+    Blends two images together.
+    :param image1: Background image, a numpy array of shape (H, W, 3), where H is height, W is width, and 3 represents the RGB color channels.
+    :param image2: Foreground image, a numpy array of shape (H, W, 3), same dimensions as image1.
+    :param alpha: Blending factor, a float between 0 and 1. The value of alpha determines the weight of image1 in the blend,
+                  where 0 means only image2 is shown, and 1 means only image1 is shown. Default is 0.5 (equal blending).
+    :return: A blended image, where each pixel is a weighted combination of the corresponding pixels from image1 and image2.
+            The blending is computed as: `blended = alpha * image1 + (1 - alpha) * image2`.
+    """
+    # Use cv2.addWeighted to blend the two images.
+    # The first image (image1) is weighted by 'alpha', and the second image (image2) is weighted by '1 - alpha'.
+    blended = cv2.addWeighted(image1, alpha, image2, 1 - alpha, 0)
+    # Return the resulting blended image.
+    return blended
+def color_string_to_rgb(color_string):
+    """
+    Converts a color string to an RGB tuple.
+    :param color_string: A string representing the color. This can be in hexadecimal form (e.g., '#ff0000') or
+                         a shorthand character for basic colors (e.g., 'k' for black, 'r' for red, etc.).
+    :return:
+            A tuple (r, g, b) representing the RGB values of the color, where each value is an integer between 0 and 255.
+    :raises:
+            ValueError: If the color string is not recognized.
+    """
+    # Remove any spaces in the color string
+    color_string = color_string.replace(' ', '')
+    # If the string starts with a '#', it's a hexadecimal color, so we remove the '#'
+    if color_string.startswith('#'):
+        color_string = color_string[1:]
+    else:
+        # Handle shorthand single-letter color codes by converting them to hex values
+        # 'k' -> black, 'r' -> red, 'g' -> green, 'b' -> blue, 'w' -> white
+        if color_string == 'k':  # Black
+            color_string = '000000'
+        elif color_string == 'r':  # Red
+            color_string = 'ff0000'
+        elif color_string == 'g':  # Green
+            color_string = '00ff00'
+        elif color_string == 'b':  # Blue
+            color_string = '0000ff'
+        elif color_string == 'w':  # White
+            color_string = 'ffffff'
+        else:
+            # Raise an error if the color string is not recognized
+            raise ValueError(f"Unknown color string {color_string}")
+    # Convert the first two characters to the red (R) value
+    r = int(color_string[:2], 16)
+    # Convert the next two characters to the green (G) value
+    g = int(color_string[2:4], 16)
+    # Convert the last two characters to the blue (B) value
+    b = int(color_string[4:], 16)
+    # Return the RGB values as a tuple
+    return (r, g, b)
+def plot_heatmap(
+    coor,
+    similairty,
+    image_path=None,
+    patch_size=(256, 256),
+    save_path=None,
+    downsize=32,
+    cmap='turbo',
+    smooth=False,
+    boxes=None,
+    box_color='k',
+    box_thickness=2,
+    polygons=None,
+    polygons_color='k',
+    polygons_thickness=2,
+    image_alpha=0.5
+):
+    """
+    Plots a heatmap overlaid on an image based on given coordinates and similairty.
+    :param coor: Array of coordinates (N, 2) where N is the number of patches to place on the heatmap.
+    :param similairty: Array of similairty (N,) corresponding to the coordinates. These similairties are mapped to colors using a colormap.
+    :param image_path: Path to the background image on which the heatmap will be overlaid. If None, a blank white background is used.
+    :param patch_size: Size of each patch in pixels (default is 256x256).
+    :param save_path: Path to save the heatmap image. If None, the heatmap is returned instead of being saved.
+    :param downsize: Factor to downsize the image and patches for faster processing. Default is 32.
+    :param cmap: Colormap to map the similairties to colors. Default is 'turbo'.
+    :param smooth: Boolean to indicate if the heatmap should be smoothed. Not implemented in this version.
+    :param boxes: List of boxes in (x, y, w, h) format. If provided, boxes will be drawn on the heatmap.
+    :param box_color: Color of the boxes. Default is black ('k').
+    :param box_thickness: Thickness of the box outlines.
+    :param polygons: List of polygons (N, 2) to draw on the heatmap.
+    :param polygons_color: Color of the polygon outlines. Default is black ('k').
+    :param polygons_thickness: Thickness of the polygon outlines.
+    :param image_alpha: Transparency value (0 to 1) for blending the heatmap with the original image. Default is 0.5.
+    :return:
+        - heatmap: The generated heatmap as a numpy array (RGB).
+        - image: The original image with overlaid polygons if provided.
+    """
+    # Read the background image (if provided), otherwise a blank image
+    image = cv2.imread(image_path)
+    image_size = (image.shape[0], image.shape[1])  # Get the size of the image
+    coor = [(x // downsize, y // downsize) for x, y in coor]  # Downsize the coordinates for faster processing
+    patch_size = (patch_size[0] // downsize, patch_size[1] // downsize)  # Downsize the patch size
+    # Convert similairties to colors using the provided colormap
+    cmap = plt.get_cmap(cmap)  # Get the colormap object
+    norm = plt.Normalize(vmin=similairty.min(), vmax=similairty.max())  # Normalize similairties to map to color range
+    colors = cmap(norm(similairty))  # Convert the normalized similairties to RGB colors
+    # Initialize a blank white heatmap the size of the image
+    heatmap = np.ones((image_size[0], image_size[1], 3)) * 255  # Start with a white background
+    # Place the colored patches on the heatmap according to the coordinates and patch size
+    for i in range(len(coor)):
+        x, y = coor[i]
+        w = colors[i][:3] * 255  # Get the RGB color for the patch, scaling from [0, 1] to [0, 255]
+        w = w.astype(np.uint8)  # Convert the color to uint8
+        heatmap[y:y + patch_size[0], x:x + patch_size[1], :] = w  # Place the patch on the heatmap
+    # If the image_alpha is greater than 0, blend the heatmap with the original image
+    if image_alpha > 0:
+        image = np.array(image)
+        # Pad the image if necessary to match the heatmap size
+        if image.shape[0] < heatmap.shape[0]:
+            pad = heatmap.shape[0] - image.shape[0]
+            image = np.pad(image, ((0, pad), (0, 0), (0, 0)), mode='constant', constant_values=255)
+        if image.shape[1] < heatmap.shape[1]:
+            pad = heatmap.shape[1] - heatmap.shape[1]
+            image = np.pad(image, ((0, 0), (0, pad), (0, 0)), mode='constant', constant_values=255)
+        # Convert the image to BGR (for OpenCV compatibility) and blend with the heatmap
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        image = image.astype(np.uint8)
+        heatmap = heatmap.astype(np.uint8)
+        heatmap = blend_images(heatmap, image, alpha=image_alpha)  # Blend the heatmap and the image
+    # If polygons are provided, draw them on the heatmap and image
+    if polygons is not None:
+        polygons = [poly // downsize for poly in polygons]  # Downsize the polygon coordinates
+        image_polygons = draw_polygon(image, polygons, color=polygons_color, thickness=polygons_thickness)  # Draw polygons on the original image
+        heatmap_polygons = draw_polygon(heatmap, polygons, color=polygons_color, thickness=polygons_thickness)  # Draw polygons on the heatmap
+        return heatmap_polygons, image_polygons  # Return the heatmap and image with polygons drawn on them
+    else:
+        return heatmap, image  # Return the heatmap and image
+def show_images_side_by_side(image1, image2, title1=None, title2=None):
+    """
+    Displays two images side by side in a single figure.
+    :param image1: The first image to display (as a numpy array).
+    :param image2: The second image to display (as a numpy array).
+    :param title1: The title for the first image. Default is None (no title).
+    :param title2: The title for the second image. Default is None (no title).
+    :return: Displays the images side by side.
+    """
+    # Create a figure with 2 subplots (1 row, 2 columns), and set the figure size
+    fig, ax = plt.subplots(1, 2, figsize=(15,8))
+    # Display the first image on the first subplot
+    ax[0].imshow(image1)
+    # Display the second image on the second subplot
+    ax[1].imshow(image2)
+    # Set the title for the first image (if provided)
+    ax[0].set_title(title1)
+    # Set the title for the second image (if provided)
+    ax[1].set_title(title2)
+    # Remove axis labels and ticks for both images to give a cleaner look
+    ax[0].axis('off')
+    ax[1].axis('off')
+    # Show the final figure with both images displayed side by side
+    plt.show()
+def plot_img_with_annotation(fullres_img, roi_polygon, linewidth, xlim, ylim):
+    """
+    Plots image with polygons.
+    :param fullres_img: The full-resolution image to display (as a numpy array).
+    :param roi_polygon: A list of polygons, where each polygon is a list of (x, y) coordinate tuples.
+    :param linewidth: The thickness of the lines used to draw the polygons.
+    :param xlim: A tuple (xmin, xmax) defining the x-axis limits for zooming in on a specific region of the image.
+    :param ylim: A tuple (ymin, ymax) defining the y-axis limits for zooming in on a specific region of the image.
+    :return: Displays the image with ROI polygons overlaid.
+    """
+    # Create a new figure with a fixed size for displaying the image and annotations
+    plt.figure(figsize=(10, 10))
+    # Display the full-resolution image
+    plt.imshow(fullres_img)
+    # Loop through each polygon in roi_polygon and plot them on the image
+    for polygon in roi_polygon:
+        x, y = zip(*polygon)  # Unzip the list of (x, y) tuples into separate x and y coordinate lists
+        plt.plot(x, y, color='black', linewidth=linewidth)  # Plot the polygon using the specified linewidth
+    # Set the x-axis limits based on the provided tuple (xlim)
+    plt.xlim(xlim)
+    # Set the y-axis limits based on the provided tuple (ylim)
+    plt.ylim(ylim)
+    # Invert the y-axis to match the typical image display convention (origin at the top-left)
+    plt.gca().invert_yaxis()
+    # Turn off the axis to give a cleaner image display without ticks or labels
+    plt.axis('off')
+def plot_annotation_heatmap(st_ad, roi_polygon, s, linewidth, xlim, ylim):
+    """
+    Plots tissue type annotation heatmap.
+    :param st_ad: AnnData object containing coordinates in `obsm['spatial']`
+                  and similarity scores in `obs['bulk_simi']`.
+    :param roi_polygon: A list of polygons, where each polygon is a list of (x, y) coordinate tuples.
+    :param s: The size of the scatter plot markers representing each spatial transcriptomics spot.
+    :param linewidth: The thickness of the lines used to draw the polygons.
+    :param xlim: A tuple (xmin, xmax) defining the x-axis limits for zooming in on a specific region of the image.
+    :param ylim: A tuple (ymin, ymax) defining the y-axis limits for zooming in on a specific region of the image.
+    :return: Displays the heatmap with polygons overlaid.
+    """
+    # Create a new figure with a fixed size for displaying the heatmap and annotations
+    plt.figure(figsize=(10, 10))
+    # Scatter plot for the spatial transcriptomics data.
+    # The 'spatial' coordinates are plotted with color intensity based on 'bulk_simi' values.
+    plt.scatter(
+        st_ad.obsm['spatial'][:, 0], st_ad.obsm['spatial'][:, 1],  # x and y coordinates
+        c=st_ad.obs['bulk_simi'],  # Color values based on 'bulk_simi'
+        s=s,  # Size of each marker
+        vmin=0.1, vmax=0.95,  # Set the range for the color normalization
+        cmap='turbo'  # Use the 'turbo' colormap for the heatmap
+    )
+    # Loop through each polygon in roi_polygon and plot them on the image
+    for polygon in roi_polygon:
+        x, y = zip(*polygon)  # Unzip the list of (x, y) tuples into separate x and y coordinate lists
+        plt.plot(x, y, color='black', linewidth=linewidth)  # Plot the polygon using the specified linewidth
+    # Set the x-axis limits based on the provided tuple (xlim)
+    plt.xlim(xlim)
+    # Set the y-axis limits based on the provided tuple (ylim)
+    plt.ylim(ylim)
+    # Invert the y-axis to match the typical image display convention (origin at the top-left)
+    plt.gca().invert_yaxis()
+    # Turn off the axis to give a cleaner image display without ticks or labels
+    plt.axis('off')

src/build/lib/loki/predex.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import pandas as pd
+def predict_st_gene_expr(image_text_similarity, train_data):
+    """
+    Predicts ST gene expression by H&E image.
+    :param image_text_similarity: Numpy array of similarities between images and text features (shape: [n_samples, n_genes]).
+    :param train_data: Numpy array or DataFrame of training data used for making predictions (shape: [n_genes, n_shared_genes]).
+    :return: Numpy array or DataFrame containing the predicted gene expression levels for the samples.
+    """
+    # Compute the weighted sum of the train_data using image_text_similarity
+    weighted_sum = image_text_similarity @ train_data
+    # Compute the normalization factor (sum of the image-text similarities for each sample)
+    weights = image_text_similarity.sum(axis=1, keepdims=True)
+    # Normalize the predicted matrix to get weighted gene expression predictions
+    predicted_image_text_matrix = weighted_sum / weights
+    return predicted_image_text_matrix

src/build/lib/loki/preprocess.py ADDED Viewed

	@@ -0,0 +1,324 @@

+import scanpy as sc
+import numpy as np
+import pandas as pd
+import json
+import os
+from PIL import Image
+def generate_gene_df(ad, house_keeping_genes, todense=True):
+    """
+    Generates a DataFrame with the top 50 genes for each observation in an AnnData object.
+    It removes genes containing '.' or '-' in their names, as well as genes listed in
+    the provided `house_keeping_genes` DataFrame/Series under the 'genesymbol' column.
+    :param ad: An AnnData object containing gene expression data.
+    :type ad: anndata.AnnData
+    :param house_keeping_genes: DataFrame or Series with a 'genesymbol' column listing housekeeping genes to exclude.
+    :type house_keeping_genes: pandas.DataFrame or pandas.Series
+    :param todense: Whether to convert the sparse matrix (ad.X) to a dense matrix before creating a DataFrame.
+    :type todense: bool
+    :return: A DataFrame (`top_k_genes_str`) that contains a 'label' column. Each row in 'label' is a string
+             with the top 50 gene names (space-separated) for that observation.
+    :rtype: pandas.DataFrame
+    """
+    # Remove genes containing '.' in their names
+    ad = ad[:, ~ad.var.index.str.contains('.', regex=False)]
+    # Remove genes containing '-'
+    ad = ad[:, ~ad.var.index.str.contains('-', regex=False)]
+    # Exclude housekeeping genes
+    ad = ad[:, ~ad.var.index.isin(house_keeping_genes['genesymbol'])]
+    # Convert to dense if requested; otherwise use the data as-is
+    if todense:
+        expr = pd.DataFrame(ad.X.todense(), index=ad.obs.index, columns=ad.var.index)
+    else:
+        expr = pd.DataFrame(ad.X, index=ad.obs.index, columns=ad.var.index)
+    # For each row (observation), find the top 50 genes with the highest expression
+    top_k_genes = expr.apply(lambda s, n: pd.Series(s.nlargest(n).index), axis=1, n=50)
+    # Create a new DataFrame to store the labels (space-separated top gene names)
+    top_k_genes_str = pd.DataFrame()
+    top_k_genes_str['label'] = top_k_genes[top_k_genes.columns].astype(str) \
+        .apply(lambda x: ' '.join(x), axis=1)
+    return top_k_genes_str
+def segment_patches(img_array, coord, patch_dir, height=20, width=20):
+    """
+    Extracts small image patches centered at specified coordinates and saves them as individual PNG files.
+    :param img_array: A NumPy array representing the full-resolution image. Shape is expected to be (H, W[, C]).
+    :type img_array: numpy.ndarray
+    :param coord: A pandas DataFrame containing patch center coordinates in columns "pixel_x" and "pixel_y".
+                  The index corresponds to spot IDs. Example columns: ["pixel_x", "pixel_y"].
+    :type coord: pandas.DataFrame
+    :param patch_dir: Directory path where the patch images will be saved.
+    :type patch_dir: str
+    :param height: The patch's height in pixels (distance in the y-direction).
+    :type height: int
+    :param width: The patch's width in pixels (distance in the x-direction).
+    :type width: int
+    :return: None. The function saves image patches to `patch_dir` but does not return anything.
+    """
+    # Ensure the output directory exists; create it if it doesn't
+    if not os.path.exists(patch_dir):
+        os.makedirs(patch_dir)
+    # Extract the overall height and width of the image
+    yrange, xrange = img_array.shape[:2]
+    # Iterate through each coordinate in the DataFrame
+    for spot_idx in coord.index:
+        # Retrieve the center x and y coordinates for the current spot
+        ycenter, xcenter = coord.loc[spot_idx, ["pixel_x", "pixel_y"]]
+        # Compute the top-left (x1, y1) and bottom-right (x2, y2) boundaries of the patch
+        x1 = round(xcenter - width / 2)
+        y1 = round(ycenter - height / 2)
+        x2 = x1 + width
+        y2 = y1 + height
+        # Check if the patch boundaries go outside the image
+        if x1 < 0 or y1 < 0 or x2 > xrange or y2 > yrange:
+            print(f"Patch {spot_idx} is out of range and will be skipped.")
+            continue
+        # Extract the patch and convert to a PIL Image; cast to uint8 if needed
+        patch_img = Image.fromarray(img_array[y1:y2, x1:x2].astype(np.uint8))
+        # Create a filename for the patch image (e.g., "0_hires.png")
+        patch_name = f"{spot_idx}_hires.png"
+        patch_path = os.path.join(patch_dir, patch_name)
+        # Save the patch image to disk
+        patch_img.save(patch_path)
+def read_gct(file_path):
+    """
+    Reads a GCT file, parses its dimensions, and returns the data as a pandas DataFrame.
+    :param file_path: The path to the GCT file to be read.
+    :return: A pandas DataFrame containing the GCT data, where the first two columns represent gene names and descriptions,
+                  and the subsequent columns contain the expression data.
+    """
+    # Open the GCT file for reading
+    with open(file_path, 'r') as file:
+        # Read and ignore the first line (GCT version line)
+        file.readline()
+        # Read the second line which contains the dimensions of the data matrix
+        dims = file.readline().strip().split()  # Split the dimensions line by whitespace
+        num_rows = int(dims[0])  # Number of data rows (genes)
+        num_cols = int(dims[1])  # Number of data columns (samples + metadata)
+        # Read the data starting from the third line, using pandas for tab-delimited data
+        # The first two columns in GCT files are "Name" and "Description" (gene identifiers and annotations)
+        data = pd.read_csv(file, sep='\t', header=0, nrows=num_rows)
+    # Return the loaded data as a pandas DataFrame
+    return data
+def get_library_id(adata):
+    """
+    Retrieves the library ID from the AnnData object, assuming it contains spatial data.
+    The function will return the first library ID found in `adata.uns['spatial']`.
+    :param adata: AnnData object containing spatial information in `adata.uns['spatial']`.
+    :return: The first library ID found in `adata.uns['spatial']`.
+    :raises:
+            AssertionError: If 'spatial' is not present in `adata.uns`.
+            Logs an error if no library ID is found.
+    """
+    # Check if 'spatial' is present in adata.uns; raises an error if not found
+    assert 'spatial' in adata.uns, "spatial not present in adata.uns"
+    # Retrieve the list of library IDs (which are keys in the 'spatial' dictionary)
+    library_ids = adata.uns['spatial'].keys()
+    try:
+        # Attempt to return the first library ID (converting the keys object to a list)
+        library_id = list(library_ids)[0]
+        return library_id
+    except IndexError:
+        # If no library IDs exist, log an error message
+        logger.error('No library_id found in adata')
+def get_scalefactors(adata, library_id=None):
+    """
+    Retrieves the scalefactors from the AnnData object for a given library ID. If no library ID is provided,
+    the function will automatically retrieve the first available library ID.
+    :param adata: AnnData object containing spatial data and scalefactors in `adata.uns['spatial']`.
+    :param library_id: The library ID for which the scalefactors are to be retrieved. If not provided, it defaults to the first available ID.
+    :return: A dictionary containing scalefactors for the specified library ID.
+    """
+    # If no library_id is provided, retrieve the first available library ID
+    if library_id is None:
+        library_id = get_library_id(adata)
+    try:
+        # Attempt to retrieve the scalefactors for the specified library ID
+        scalef = adata.uns['spatial'][library_id]['scalefactors']
+        return scalef
+    except KeyError:
+        # Log an error if the scalefactors or library ID is not found
+        logger.error('scalefactors not found in adata')
+def get_spot_diameter_in_pixels(adata, library_id=None):
+    """
+    Retrieves the spot diameter in pixels from the AnnData object's scalefactors for a given library ID.
+    If no library ID is provided, the function will automatically retrieve the first available library ID.
+    :param adata: AnnData object containing spatial data and scalefactors in `adata.uns['spatial']`.
+    :param library_id: The library ID for which the spot diameter is to be retrieved. If not provided, defaults to the first available ID.
+    :return: The spot diameter in full resolution pixels, or None if not found.
+    """
+    # Get the scalefactors for the specified or default library ID
+    scalef = get_scalefactors(adata, library_id=library_id)
+    try:
+        # Attempt to retrieve the spot diameter in full resolution from the scalefactors
+        spot_diameter = scalef['spot_diameter_fullres']
+        return spot_diameter
+    except TypeError:
+        # Handle case where `scalef` is None or invalid (if get_scalefactors returned None)
+        pass
+    except KeyError:
+        # Log an error if the 'spot_diameter_fullres' key is not found in the scalefactors
+        logger.error('spot_diameter_fullres not found in adata')
+def prepare_data_for_alignment(data_path, scale_type='tissue_hires_scalef'):
+    """
+    Prepares data for alignment by reading an AnnData object and preparing the high-resolution tissue image.
+    :param data_path: The path to the AnnData (.h5ad) file containing the Visium data.
+    :param scale_type: The type of scale factor to use (`tissue_hires_scalef` by default).
+    :return:
+        - ad: AnnData object containing the spatial transcriptomics data.
+        - ad_coor: Numpy array of scaled spatial coordinates (adjusted for the specified resolution).
+        - img: High-resolution tissue image, normalized to 8-bit unsigned integers.
+    :raises:
+            ValueError: If required data (e.g., scale factors, spatial coordinates, or images) is missing.
+    """
+    # Load the AnnData object from the specified file path
+    ad = sc.read_h5ad(data_path)
+    # Ensure the variable (gene) names are unique to avoid potential conflicts
+    ad.var_names_make_unique()
+    try:
+        # Retrieve the specified scale factor for spatial coordinates
+        scalef = get_scalefactors(ad)[scale_type]
+    except KeyError:
+        raise ValueError(f"Scale factor '{scale_type}' not found in ad.uns['spatial']")
+    # Scale the spatial coordinates using the specified scale factor
+    try:
+        ad_coor = np.array(ad.obsm['spatial']) * scalef
+    except KeyError:
+        raise ValueError("Spatial coordinates not found in ad.obsm['spatial']")
+    # Retrieve the high-resolution tissue image
+    try:
+        img = ad.uns['spatial'][get_library_id(ad)]['images']['hires']
+    except KeyError:
+        raise ValueError("High-resolution image not found in ad.uns['spatial']")
+    # If the image values are normalized to [0, 1], convert to 8-bit format for compatibility
+    if img.max() < 1.1:
+        img = (img * 255).astype('uint8')
+    return ad, ad_coor, img
+def load_data_for_annotation(st_data_path, json_path, in_tissue=True):
+    """
+    Loads spatial transcriptomics (ST) data from an .h5ad file and prepares it for annotation.
+    :param sample_type: The type or category of the sample (used to locate the data in the directory structure).
+    :param sample_name: The name of the sample (used to locate specific files).
+    :param in_tissue: Boolean flag to filter the data to include only spots that are in tissue. Default is True.
+    :return:
+        - st_ad: AnnData object containing the spatial transcriptomics data, with spatial coordinates in `obs`.
+        - library_id: The library ID associated with the spatial data.
+        - roi_polygon: Region of interest polygon loaded from a JSON file for further annotation or analysis.
+    """
+    # Load the spatial transcriptomics data into an AnnData object
+    st_ad = sc.read_h5ad(st_data_path)
+    # Optionally filter the data to include only spots that are within the tissue
+    if in_tissue:
+        st_ad = st_ad[st_ad.obs['in_tissue'] == 1]
+    # Initialize pixel coordinates for spatial information
+    st_ad.obs[["pixel_y", "pixel_x"]] = None  # Ensure the columns exist
+    st_ad.obs[["pixel_y", "pixel_x"]] = st_ad.obsm['spatial']  # Copy spatial coordinates into obs
+    # Retrieve the library ID associated with the spatial data
+    library_id = get_library_id(st_ad)
+    # Load the region of interest (ROI) polygon from a JSON file
+    with open(json_path) as f:
+        roi_polygon = json.load(f)
+    return st_ad, library_id, roi_polygon
+def read_polygons(file_path, slide_id):
+    """
+    Reads polygon data from a JSON file for a specific slide ID, extracting coordinates, colors, and thickness.
+    :param file_path: Path to the JSON file containing polygon configurations.
+    :param slide_id: Identifier for the specific slide whose polygon data is to be extracted.
+    :return:
+        - polygons: A list of numpy arrays, where each array contains the coordinates of a polygon.
+        - polygon_colors: A list of color values corresponding to each polygon.
+        - polygon_thickness: A list of thickness values for each polygon's border.
+    """
+    # Open the JSON file and load the polygon configurations into a Python dictionary
+    with open(file_path, 'r') as f:
+        polygons_configs = json.load(f)
+    # Check if the given slide_id exists in the polygon configurations
+    if slide_id not in polygons_configs:
+        return None, None, None  # If slide_id is not found, return None for all outputs
+    # Extract the polygon coordinates, colors, and thicknesses for the given slide_id
+    polygons = [np.array(poly['coords']) for poly in polygons_configs[slide_id]]  # Convert polygon coordinates to numpy arrays
+    polygon_colors = [poly['color'] for poly in polygons_configs[slide_id]]  # Extract the color for each polygon
+    polygon_thickness = [poly['thickness'] for poly in polygons_configs[slide_id]]  # Extract the thickness for each polygon
+    # Return the polygons, their colors, and their thicknesses
+    return polygons, polygon_colors, polygon_thickness

src/build/lib/loki/retrieve.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import torch
+def retrieve_st_by_image(image_embeddings, all_text_embeddings, dataframe, k=3):
+    """
+    Retrieves the top-k most similar ST based on the similarity between ST embeddings and image embeddings.
+    :param image_embeddings: A numpy array or torch tensor containing image embeddings (shape: [1, embedding_dim]).
+    :param all_text_embeddings: A numpy array or torch tensor containing ST embeddings (shape: [n_samples, embedding_dim]).
+    :param dataframe: A pandas DataFrame containing information about the ST samples, specifically the image indices in the 'img_idx' column.
+    :param k: The number of top similar samples to retrieve. Default is 3.
+    :return: A list of the filenames or indices corresponding to the top-k similar samples.
+    """
+    # Compute the dot product (similarity) between the image embeddings and all ST embeddings
+    dot_similarity = image_embeddings @ all_text_embeddings.T
+    # Retrieve the top-k most similar samples by similarity score (dot product)
+    values, indices = torch.topk(dot_similarity.squeeze(0), k)
+    # Extract the image filenames or indices from the DataFrame based on the top-k matches
+    image_filenames = dataframe['img_idx'].values
+    matches = [image_filenames[idx] for idx in indices]
+    return matches

src/build/lib/loki/utilities.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import os
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn.functional as F
+from PIL import Image
+import json
+import cv2
+from sklearn.decomposition import PCA
+from open_clip import create_model_from_pretrained, get_tokenizer
+def load_model(model_path, device):
+    model, preprocess = create_model_from_pretrained("coca_ViT-L-14", device=device, pretrained=model_path)
+    tokenizer = get_tokenizer('coca_ViT-L-14')
+    return model, preprocess, tokenizer
+def encode_image(model, preprocess, image):
+    image_input = torch.stack([preprocess(image)])
+    with torch.no_grad():
+        image_features = model.encode_image(image_input)
+    image_embeddings = F.normalize(image_features, p=2, dim=-1)
+    return image_embeddings
+def encode_image_patches(model, preprocess, data_dir, img_list):
+    image_embeddings = []
+    for img_name in img_list:
+        image_path = os.path.join(data_dir, 'demo_data', 'patch', img_name)
+        image = Image.open(image_path)
+        image_features = encode_image(model, preprocess, image)
+        image_embeddings.append(image_features)
+    image_embeddings = torch.from_numpy(np.array(image_embeddings))
+    image_embeddings = F.normalize(image_embeddings, p=2, dim=-1)
+    return image_embeddings
+def encode_text(model, tokenizer, text):
+    text_input = tokenizer(text)
+    with torch.no_grad():
+        text_features = model.encode_text(text_input)
+    text_embeddings = F.normalize(text_features, p=2, dim=-1)
+    return text_embeddings
+def encode_text_df(model, tokenizer, df, col_name):
+    text_embeddings = []
+    for idx in df.index:
+        text = df[df.index==idx][col_name][0]
+        text_features = encode_text(model, tokenizer, text)
+        text_embeddings.append(text_features)
+    text_embeddings = torch.from_numpy(np.array(text_embeddings))
+    text_embeddings = F.normalize(text_embeddings, p=2, dim=-1)
+    return text_embeddings
+def get_pca_by_fit(tar_features, src_features):
+    """
+    Applies PCA to target features and transforms both target and source features using the fitted PCA model.
+    Combines the PCA-transformed features from both target and source datasets and returns the combined data
+    along with batch labels indicating the origin of each sample.
+    :param tar_features: Numpy array of target features (samples by features).
+    :param src_features: Numpy array of source features (samples by features).
+    :return:
+        - pca_comb_features: A numpy array containing PCA-transformed target and source features combined.
+        - pca_comb_features_batch: A numpy array of batch labels indicating which samples are from target (0) and source (1).
+    """
+    pca = PCA(n_components=3)
+    # Fit the PCA model on the target features (transposed to fit on features)
+    pca_fit_tar = pca.fit(tar_features.T)
+    # Transform the target and source features using the fitted PCA model
+    pca_tar = pca_fit_tar.transform(tar_features.T)  # Transform target features
+    pca_src = pca_fit_tar.transform(src_features.T)  # Transform source features using the same PCA fit
+    # Combine the PCA-transformed target and source features
+    pca_comb_features = np.concatenate((pca_tar, pca_src))
+    # Create a batch label array: 0 for target features, 1 for source features
+    pca_comb_features_batch = np.array([0] * len(pca_tar) + [1] * len(pca_src))
+    return pca_comb_features, pca_comb_features_batch
+def cap_quantile(weight, cap_max=None, cap_min=None):
+    """
+    Caps the values in the 'weight' array based on the specified quantile thresholds for maximum and minimum values.
+    If the quantile thresholds are provided, the function will replace values above or below these thresholds
+    with the corresponding quantile values.
+    :param weight: Numpy array of weights to be capped.
+    :param cap_max: Quantile threshold for the maximum cap. Values above this quantile will be capped.
+                    If None, no maximum capping will be applied.
+    :param cap_min: Quantile threshold for the minimum cap. Values below this quantile will be capped.
+                    If None, no minimum capping will be applied.
+    :return: Numpy array with the values capped at the specified quantiles.
+    """
+    # If a maximum cap is specified, calculate the value at the specified cap_max quantile
+    if cap_max is not None:
+        cap_max = np.quantile(weight, cap_max)  # Get the value at the cap_max quantile
+    # If a minimum cap is specified, calculate the value at the specified cap_min quantile
+    if cap_min is not None:
+        cap_min = np.quantile(weight, cap_min)  # Get the value at the cap_min quantile
+    # Cap the values in 'weight' array to not exceed the maximum cap (cap_max)
+    weight = np.minimum(weight, cap_max)
+    # Cap the values in 'weight' array to not go below the minimum cap (cap_min)
+    weight = np.maximum(weight, cap_min)
+    return weight
+def read_polygons(file_path, slide_id):
+    """
+    Reads polygon data from a JSON file for a specific slide ID, extracting coordinates, colors, and thickness.
+    :param file_path: Path to the JSON file containing polygon configurations.
+    :param slide_id: Identifier for the specific slide whose polygon data is to be extracted.
+    :return:
+        - polygons: A list of numpy arrays, where each array contains the coordinates of a polygon.
+        - polygon_colors: A list of color values corresponding to each polygon.
+        - polygon_thickness: A list of thickness values for each polygon's border.
+    """
+    # Open the JSON file and load the polygon configurations into a Python dictionary
+    with open(file_path, 'r') as f:
+        polygons_configs = json.load(f)
+    # Check if the given slide_id exists in the polygon configurations
+    if slide_id not in polygons_configs:
+        return None, None, None  # If slide_id is not found, return None for all outputs
+    # Extract the polygon coordinates, colors, and thicknesses for the given slide_id
+    polygons = [np.array(poly['coords']) for poly in polygons_configs[slide_id]]  # Convert polygon coordinates to numpy arrays
+    polygon_colors = [poly['color'] for poly in polygons_configs[slide_id]]  # Extract the color for each polygon
+    polygon_thickness = [poly['thickness'] for poly in polygons_configs[slide_id]]  # Extract the thickness for each polygon
+    # Return the polygons, their colors, and their thicknesses
+    return polygons, polygon_colors, polygon_thickness

src/build/lib/loki/utils.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import os
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn.functional as F
+from PIL import Image
+import json
+import cv2
+from sklearn.decomposition import PCA
+from open_clip import create_model_from_pretrained, get_tokenizer
+def load_model(model_path, device):
+    """
+    Loads a pretrained CoCa (CLIP-like) model, along with its preprocessing function and tokenizer,
+    using the specified model checkpoint.
+    :param model_path: File path or URL to the pretrained model checkpoint. This is passed to
+                       `create_model_from_pretrained` as the `pretrained` argument.
+    :type model_path: str
+    :param device: The device on which to load the model (e.g., 'cpu' or 'cuda').
+    :type device: str or torch.device
+    :return: A tuple `(model, preprocess, tokenizer)` where:
+             - model: The loaded CoCa model.
+             - preprocess: A function or transform that preprocesses input data for the model.
+             - tokenizer: A tokenizer appropriate for textual input to the model.
+    :rtype: (nn.Module, callable, callable)
+    """
+    # Create the model and its preprocessing transform from the specified checkpoint
+    model, preprocess = create_model_from_pretrained(
+        "coca_ViT-L-14", device=device, pretrained=model_path
+    )
+    # Retrieve a tokenizer compatible with the "coca_ViT-L-14" architecture
+    tokenizer = get_tokenizer('coca_ViT-L-14')
+    return model, preprocess, tokenizer
+def encode_image(model, preprocess, image):
+    """
+    Encodes an image into a normalized feature embedding using the specified model and preprocessing function.
+    :param model: A model object that provides an `encode_image` method (e.g., a CLIP or CoCa model).
+    :type model: torch.nn.Module
+    :param preprocess: A preprocessing function that transforms the input image into a tensor
+                       suitable for the model. Typically something returning a PyTorch tensor.
+    :type preprocess: callable
+    :param image: The input image (PIL Image, NumPy array, or other format supported by `preprocess`).
+    :type image: PIL.Image.Image or numpy.ndarray
+    :return: A single normalized image embedding as a PyTorch tensor of shape (1, embedding_dim).
+    :rtype: torch.Tensor
+    """
+    # Preprocess the image, then stack to create a batch of size 1
+    image_input = torch.stack([preprocess(image)])
+    # Generate the image features without gradient tracking
+    with torch.no_grad():
+        image_features = model.encode_image(image_input)
+    # Normalize embeddings across the feature dimension (L2 normalization)
+    image_embeddings = F.normalize(image_features, p=2, dim=-1)
+    return image_embeddings
+def encode_image_patches(model, preprocess, data_dir, img_list):
+    """
+    Encodes multiple image patches into normalized feature embeddings using a specified model and preprocess function.
+    :param model: A model object that provides an `encode_image` method (e.g., a CLIP or CoCa model).
+    :type model: torch.nn.Module
+    :param preprocess: A preprocessing function that transforms the input image into a tensor
+                       suitable for the model. Typically something returning a PyTorch tensor.
+    :type preprocess: callable
+    :param data_dir: The base directory containing image data.
+    :type data_dir: str
+    :param img_list: A list of image filenames (strings). Each filename corresponds to a patch image
+                     stored in `data_dir/demo_data/patch/`.
+    :type img_list: list[str]
+    :return: A PyTorch tensor of shape (N, 1, embedding_dim), containing the normalized embeddings
+             for each image in `img_list`.
+    :rtype: torch.Tensor
+    """
+    # Prepare a list to hold each image's feature embedding
+    image_embeddings = []
+    # Loop through each image name in the provided list
+    for img_name in img_list:
+        # Build the path to the patch image and open it
+        image_path = os.path.join(data_dir, 'demo_data', 'patch', img_name)
+        image = Image.open(image_path)
+        # Encode the image using the model & preprocess; returns shape (1, embedding_dim)
+        image_features = encode_image(model, preprocess, image)
+        # Accumulate the feature embeddings in the list
+        image_embeddings.append(image_features)
+    # Convert the list of embeddings to a NumPy array, then to a PyTorch tensor
+    # Resulting shape will be (N, 1, embedding_dim)
+    image_embeddings = torch.from_numpy(np.array(image_embeddings))
+    # Normalize all embeddings across the feature dimension (L2 normalization)
+    image_embeddings = F.normalize(image_embeddings, p=2, dim=-1)
+    return image_embeddings
+def encode_text(model, tokenizer, text):
+    """
+    Encodes text into a normalized feature embedding using a specified model and tokenizer.
+    :param model: A model object that provides an `encode_text` method (e.g., a CLIP-like or CoCa model).
+    :type model: torch.nn.Module
+    :param tokenizer: A tokenizer function that converts the input text into a format suitable for `model.encode_text`.
+                      Typically returns token IDs, attention masks, etc. as a torch.Tensor or similar structure.
+    :type tokenizer: callable
+    :param text: The input text (string or list of strings) to be encoded.
+    :type text: str or list[str]
+    :return: A PyTorch tensor of shape (batch_size, embedding_dim) containing the L2-normalized text embeddings.
+    :rtype: torch.Tensor
+    """
+    # Convert text to the appropriate tokenized representation
+    text_input = tokenizer(text)
+    # Run the model in no-grad mode (not tracking gradients, saving memory and compute)
+    with torch.no_grad():
+        text_features = model.encode_text(text_input)
+    # Normalize embeddings to unit length
+    text_embeddings = F.normalize(text_features, p=2, dim=-1)
+    return text_embeddings
+def encode_text_df(model, tokenizer, df, col_name):
+    """
+    Encodes text from a specified column in a pandas DataFrame using the given model and tokenizer,
+    returning a PyTorch tensor of normalized text embeddings.
+    :param model: A model object that provides an `encode_text` method (e.g., a CLIP-like or CoCa model).
+    :type model: torch.nn.Module
+    :param tokenizer: A tokenizer function that converts the input text into a format suitable for `model.encode_text`.
+    :type tokenizer: callable
+    :param df: A pandas DataFrame from which text will be extracted.
+    :type df: pandas.DataFrame
+    :param col_name: The name of the column in `df` that contains the text to be encoded.
+    :type col_name: str
+    :return: A PyTorch tensor containing the L2-normalized text embeddings,
+             where the shape is (number_of_rows, embedding_dim).
+    :rtype: torch.Tensor
+    """
+    # Prepare a list to hold each row's text embedding
+    text_embeddings = []
+    # Loop through each index in the DataFrame
+    for idx in df.index:
+        # Retrieve text from the specified column for the current row
+        text = df[df.index == idx][col_name][0]
+        # Encode the text using the provided model and tokenizer
+        text_features = encode_text(model, tokenizer, text)
+        # Accumulate the embedding tensor
+        text_embeddings.append(text_features)
+    # Convert the list of embeddings (likely shape [N, embedding_dim]) into a NumPy array, then to a torch tensor
+    text_embeddings = torch.from_numpy(np.array(text_embeddings))
+    # Normalize embeddings to unit length across the feature dimension
+    text_embeddings = F.normalize(text_embeddings, p=2, dim=-1)
+    return text_embeddings
+def get_pca_by_fit(tar_features, src_features):
+    """
+    Applies PCA to target features and transforms both target and source features using the fitted PCA model.
+    Combines the PCA-transformed features from both target and source datasets and returns the combined data
+    along with batch labels indicating the origin of each sample.
+    :param tar_features: Numpy array of target features (samples by features).
+    :param src_features: Numpy array of source features (samples by features).
+    :return:
+        - pca_comb_features: A numpy array containing PCA-transformed target and source features combined.
+        - pca_comb_features_batch: A numpy array of batch labels indicating which samples are from target (0) and source (1).
+    """
+    pca = PCA(n_components=3)
+    # Fit the PCA model on the target features (transposed to fit on features)
+    pca_fit_tar = pca.fit(tar_features.T)
+    # Transform the target and source features using the fitted PCA model
+    pca_tar = pca_fit_tar.transform(tar_features.T)  # Transform target features
+    pca_src = pca_fit_tar.transform(src_features.T)  # Transform source features using the same PCA fit
+    # Combine the PCA-transformed target and source features
+    pca_comb_features = np.concatenate((pca_tar, pca_src))
+    # Create a batch label array: 0 for target features, 1 for source features
+    pca_comb_features_batch = np.array([0] * len(pca_tar) + [1] * len(pca_src))
+    return pca_comb_features, pca_comb_features_batch
+def cap_quantile(weight, cap_max=None, cap_min=None):
+    """
+    Caps the values in the 'weight' array based on the specified quantile thresholds for maximum and minimum values.
+    If the quantile thresholds are provided, the function will replace values above or below these thresholds
+    with the corresponding quantile values.
+    :param weight: Numpy array of weights to be capped.
+    :param cap_max: Quantile threshold for the maximum cap. Values above this quantile will be capped.
+                    If None, no maximum capping will be applied.
+    :param cap_min: Quantile threshold for the minimum cap. Values below this quantile will be capped.
+                    If None, no minimum capping will be applied.
+    :return: Numpy array with the values capped at the specified quantiles.
+    """
+    # If a maximum cap is specified, calculate the value at the specified cap_max quantile
+    if cap_max is not None:
+        cap_max = np.quantile(weight, cap_max)  # Get the value at the cap_max quantile
+    # If a minimum cap is specified, calculate the value at the specified cap_min quantile
+    if cap_min is not None:
+        cap_min = np.quantile(weight, cap_min)  # Get the value at the cap_min quantile
+    # Cap the values in 'weight' array to not exceed the maximum cap (cap_max)
+    weight = np.minimum(weight, cap_max)
+    # Cap the values in 'weight' array to not go below the minimum cap (cap_min)
+    weight = np.maximum(weight, cap_min)
+    return weight
+def read_polygons(file_path, slide_id):
+    """
+    Reads polygon data from a JSON file for a specific slide ID, extracting coordinates, colors, and thickness.
+    :param file_path: Path to the JSON file containing polygon configurations.
+    :param slide_id: Identifier for the specific slide whose polygon data is to be extracted.
+    :return:
+        - polygons: A list of numpy arrays, where each array contains the coordinates of a polygon.
+        - polygon_colors: A list of color values corresponding to each polygon.
+        - polygon_thickness: A list of thickness values for each polygon's border.
+    """
+    # Open the JSON file and load the polygon configurations into a Python dictionary
+    with open(file_path, 'r') as f:
+        polygons_configs = json.load(f)
+    # Check if the given slide_id exists in the polygon configurations
+    if slide_id not in polygons_configs:
+        return None, None, None  # If slide_id is not found, return None for all outputs
+    # Extract the polygon coordinates, colors, and thicknesses for the given slide_id
+    polygons = [np.array(poly['coords']) for poly in polygons_configs[slide_id]]  # Convert polygon coordinates to numpy arrays
+    polygon_colors = [poly['color'] for poly in polygons_configs[slide_id]]  # Extract the color for each polygon
+    polygon_thickness = [poly['thickness'] for poly in polygons_configs[slide_id]]  # Extract the thickness for each polygon
+    # Return the polygons, their colors, and their thicknesses
+    return polygons, polygon_colors, polygon_thickness

src/dist/loki-0.0.1-py3-none-any.whl ADDED Viewed

Binary file (22.2 kB). View file

src/dist/loki-0.0.1.tar.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98f4615e981aeb895088cb71b1f358a9d6470302043c7cab8bc15396b9cbbe0d
+size 20339

src/loki.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,23 @@

+Metadata-Version: 2.1
+Name: loki
+Version: 0.0.1
+Summary: The Loki platform offers 5 core functions: tissue alignment, cell type decomposition, tissue annotation, image-transcriptomics retrieval, and ST gene expression prediction
+Author: Weiqing Chen
+Author-email: wec4005@med.cornell.edu
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.9
+Requires-Dist: anndata==0.10.9
+Requires-Dist: matplotlib==3.9.2
+Requires-Dist: numpy==1.25.0
+Requires-Dist: pandas==2.2.3
+Requires-Dist: opencv-python==4.10.0.84
+Requires-Dist: pycpd==2.0.0
+Requires-Dist: torch==2.3.1
+Requires-Dist: tangram-sc==1.0.4
+Requires-Dist: tqdm==4.66.5
+Requires-Dist: torchvision==0.18.1
+Requires-Dist: open_clip_torch==2.26.1
+Requires-Dist: pillow==10.4.0
+Requires-Dist: ipykernel==6.29.5

src/loki.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+README.md
+setup.py
+loki/__init__.py
+loki/align.py
+loki/annotate.py
+loki/decompose.py
+loki/plot.py
+loki/predex.py
+loki/preprocess.py
+loki/retrieve.py
+loki/utils.py
+loki.egg-info/PKG-INFO
+loki.egg-info/SOURCES.txt
+loki.egg-info/dependency_links.txt
+loki.egg-info/requires.txt
+loki.egg-info/top_level.txt

src/loki.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/loki.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+anndata==0.10.9
+matplotlib==3.9.2
+numpy==1.25.0
+pandas==2.2.3
+opencv-python==4.10.0.84
+pycpd==2.0.0
+torch==2.3.1
+tangram-sc==1.0.4
+tqdm==4.66.5
+torchvision==0.18.1
+open_clip_torch==2.26.1
+pillow==10.4.0
+ipykernel==6.29.5

src/loki.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ loki

src/loki/__init__.py ADDED Viewed

File without changes

src/loki/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (139 Bytes). View file

src/loki/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (146 Bytes). View file

src/loki/__pycache__/align.cpython-39.pyc ADDED Viewed

Binary file (17.3 kB). View file

src/loki/__pycache__/annotate.cpython-39.pyc ADDED Viewed

Binary file (2.99 kB). View file

src/loki/__pycache__/decompose.cpython-39.pyc ADDED Viewed

Binary file (4.72 kB). View file

src/loki/__pycache__/deconv.cpython-39.pyc ADDED Viewed

Binary file (3.52 kB). View file

src/loki/__pycache__/plot.cpython-39.pyc ADDED Viewed

Binary file (13.6 kB). View file

src/loki/__pycache__/predex.cpython-39.pyc ADDED Viewed

Binary file (904 Bytes). View file

src/loki/__pycache__/preprocess.cpython-39.pyc ADDED Viewed

Binary file (10.8 kB). View file

src/loki/__pycache__/retrieve.cpython-39.pyc ADDED Viewed

Binary file (1.38 kB). View file

src/loki/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (9.44 kB). View file

src/loki/align.py ADDED Viewed

	@@ -0,0 +1,568 @@

+import pycpd
+from builtins import super
+import numbers
+import numpy as np
+import cv2
+class EMRegistration(object):
+    """
+    Expectation maximization point cloud registration.
+    Adapted from Pure Numpy Implementation of the Coherent Point Drift Algorithm:
+    https://github.com/siavashk/pycpd
+    Attributes
+    ----------
+    X: numpy array
+        NxD array of target points.
+    Y: numpy array
+        MxD array of source points.
+    TY: numpy array
+        MxD array of transformed source points.
+    sigma2: float (positive)
+        Initial variance of the Gaussian mixture model.
+    N: int
+        Number of target points.
+    M: int
+        Number of source points.
+    D: int
+        Dimensionality of source and target points
+    iteration: int
+        The current iteration throughout registration.
+    max_iterations: int
+        Registration will terminate once the algorithm has taken this
+        many iterations.
+    tolerance: float (positive)
+        Registration will terminate once the difference between
+        consecutive objective function values falls within this tolerance.
+    w: float (between 0 and 1)
+        Contribution of the uniform distribution to account for outliers.
+        Valid values span 0 (inclusive) and 1 (exclusive).
+    q: float
+        The objective function value that represents the misalignment between source
+        and target point clouds.
+    diff: float (positive)
+        The absolute difference between the current and previous objective function values.
+    P: numpy array
+        MxN array of probabilities.
+        P[m, n] represents the probability that the m-th source point
+        corresponds to the n-th target point.
+    Pt1: numpy array
+        Nx1 column array.
+        Multiplication result between the transpose of P and a column vector of all 1s.
+    P1: numpy array
+        Mx1 column array.
+        Multiplication result between P and a column vector of all 1s.
+    Np: float (positive)
+        The sum of all elements in P.
+    """
+    def __init__(self, X, Y, sigma2=None, max_iterations=None, tolerance=None, w=None, *args, **kwargs):
+        if type(X) is not np.ndarray or X.ndim != 2:
+            raise ValueError(
+                "The target point cloud (X) must be at a 2D numpy array.")
+        if type(Y) is not np.ndarray or Y.ndim != 2:
+            raise ValueError(
+                "The source point cloud (Y) must be a 2D numpy array.")
+        if X.shape[1] != Y.shape[1]:
+            raise ValueError(
+                "Both point clouds need to have the same number of dimensions.")
+        if sigma2 is not None and (not isinstance(sigma2, numbers.Number) or sigma2 <= 0):
+            raise ValueError(
+                "Expected a positive value for sigma2 instead got: {}".format(sigma2))
+        if max_iterations is not None and (not isinstance(max_iterations, numbers.Number) or max_iterations < 0):
+            raise ValueError(
+                "Expected a positive integer for max_iterations instead got: {}".format(max_iterations))
+        elif isinstance(max_iterations, numbers.Number) and not isinstance(max_iterations, int):
+            warn("Received a non-integer value for max_iterations: {}. Casting to integer.".format(max_iterations))
+            max_iterations = int(max_iterations)
+        if tolerance is not None and (not isinstance(tolerance, numbers.Number) or tolerance < 0):
+            raise ValueError(
+                "Expected a positive float for tolerance instead got: {}".format(tolerance))
+        if w is not None and (not isinstance(w, numbers.Number) or w < 0 or w >= 1):
+            raise ValueError(
+                "Expected a value between 0 (inclusive) and 1 (exclusive) for w instead got: {}".format(w))
+        self.X = X
+        self.Y = Y
+        self.TY = Y
+        self.sigma2 = initialize_sigma2(X, Y) if sigma2 is None else sigma2
+        (self.N, self.D) = self.X.shape
+        (self.M, _) = self.Y.shape
+        self.tolerance = 0.001 if tolerance is None else tolerance
+        self.w = 0.0 if w is None else w
+        self.max_iterations = 100 if max_iterations is None else max_iterations
+        self.iteration = 0
+        self.diff = np.inf
+        self.q = np.inf
+        self.P = np.zeros((self.M, self.N))
+        self.Pt1 = np.zeros((self.N, ))
+        self.P1 = np.zeros((self.M, ))
+        self.PX = np.zeros((self.M, self.D))
+        self.Np = 0
+    def register(self, callback=lambda **kwargs: None):
+        """
+        Perform the EM registration.
+        Attributes
+        ----------
+        callback: function
+            A function that will be called after each iteration.
+            Can be used to visualize the registration process.
+        Returns
+        -------
+        self.TY: numpy array
+            MxD array of transformed source points.
+        registration_parameters:
+            Returned params dependent on registration method used.
+        """
+        self.transform_point_cloud()
+        while self.iteration < self.max_iterations and self.diff > self.tolerance:
+            self.iterate()
+            if callable(callback):
+                kwargs = {'iteration': self.iteration,
+                          'error': self.q, 'X': self.X, 'Y': self.TY}
+                callback(**kwargs)
+        return self.TY, self.get_registration_parameters()
+    def get_registration_parameters(self):
+        """
+        Placeholder for child classes.
+        """
+        raise NotImplementedError(
+            "Registration parameters should be defined in child classes.")
+    def update_transform(self):
+        """
+        Placeholder for child classes.
+        """
+        raise NotImplementedError(
+            "Updating transform parameters should be defined in child classes.")
+    def transform_point_cloud(self):
+        """
+        Placeholder for child classes.
+        """
+        raise NotImplementedError(
+            "Updating the source point cloud should be defined in child classes.")
+    def update_variance(self):
+        """
+        Placeholder for child classes.
+        """
+        raise NotImplementedError(
+            "Updating the Gaussian variance for the mixture model should be defined in child classes.")
+    def iterate(self):
+        """
+        Perform one iteration of the EM algorithm.
+        """
+        self.expectation()
+        self.maximization()
+        self.iteration += 1
+    def expectation(self):
+        """
+        Compute the expectation step of the EM algorithm.
+        """
+        P = np.sum((self.X[None, :, :] - self.TY[:, None, :])**2, axis=2) # (M, N)
+        P = np.exp(-P/(2*self.sigma2))
+        c = (2*np.pi*self.sigma2)**(self.D/2)*self.w/(1. - self.w)*self.M/self.N
+        den = np.sum(P, axis = 0, keepdims = True) # (1, N)
+        den = np.clip(den, np.finfo(self.X.dtype).eps, None) + c
+        self.P = np.divide(P, den)
+        self.Pt1 = np.sum(self.P, axis=0)
+        self.P1 = np.sum(self.P, axis=1)
+        self.Np = np.sum(self.P1)
+        self.PX = np.matmul(self.P, self.X)
+    def maximization(self):
+        """
+        Compute the maximization step of the EM algorithm.
+        """
+        self.update_transform()
+        self.transform_point_cloud()
+        self.update_variance()
+class DeformableRegistration(EMRegistration):
+    """
+    Deformable registration.
+    Adapted from Pure Numpy Implementation of the Coherent Point Drift Algorithm:
+    https://github.com/siavashk/pycpd
+    Attributes
+    ----------
+    alpha: float (positive)
+        Represents the trade-off between the goodness of maximum likelihood fit and regularization.
+    beta: float(positive)
+        Width of the Gaussian kernel.
+    low_rank: bool
+        Whether to use low rank approximation.
+    num_eig: int
+        Number of eigenvectors to use in lowrank calculation.
+    """
+    def __init__(self, alpha=None, beta=None, low_rank=False, num_eig=100, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if alpha is not None and (not isinstance(alpha, numbers.Number) or alpha <= 0):
+            raise ValueError(
+                "Expected a positive value for regularization parameter alpha. Instead got: {}".format(alpha))
+        if beta is not None and (not isinstance(beta, numbers.Number) or beta <= 0):
+            raise ValueError(
+                "Expected a positive value for the width of the coherent Gaussian kerenl. Instead got: {}".format(beta))
+        self.alpha = 2 if alpha is None else alpha
+        self.beta = 2 if beta is None else beta
+        self.W = np.zeros((self.M, self.D))
+        self.G = gaussian_kernel(self.Y, self.beta)
+        self.low_rank = low_rank
+        self.num_eig = num_eig
+        if self.low_rank is True:
+            self.Q, self.S = low_rank_eigen(self.G, self.num_eig)
+            self.inv_S = np.diag(1./self.S)
+            self.S = np.diag(self.S)
+            self.E = 0.
+    def update_transform(self):
+        """
+        Calculate a new estimate of the deformable transformation.
+        See Eq. 22 of https://arxiv.org/pdf/0905.2635.pdf.
+        """
+        if self.low_rank is False:
+            A = np.dot(np.diag(self.P1), self.G) + \
+                self.alpha * self.sigma2 * np.eye(self.M)
+            B = self.PX - np.dot(np.diag(self.P1), self.Y)
+            self.W = np.linalg.solve(A, B)
+        elif self.low_rank is True:
+            # Matlab code equivalent can be found here:
+            # https://github.com/markeroon/matlab-computer-vision-routines/tree/master/third_party/CoherentPointDrift
+            dP = np.diag(self.P1)
+            dPQ = np.matmul(dP, self.Q)
+            F = self.PX - np.matmul(dP, self.Y)
+            self.W = 1 / (self.alpha * self.sigma2) * (F - np.matmul(dPQ, (
+                np.linalg.solve((self.alpha * self.sigma2 * self.inv_S + np.matmul(self.Q.T, dPQ)),
+                                (np.matmul(self.Q.T, F))))))
+            QtW = np.matmul(self.Q.T, self.W)
+            self.E = self.E + self.alpha / 2 * np.trace(np.matmul(QtW.T, np.matmul(self.S, QtW)))
+    def transform_point_cloud(self, Y=None):
+        """
+        Update a point cloud using the new estimate of the deformable transformation.
+        Attributes
+        ----------
+        Y: numpy array, optional
+            Array of points to transform - use to predict on new set of points.
+            Best for predicting on new points not used to run initial registration.
+                If None, self.Y used.
+        Returns
+        -------
+        If Y is None, returns None.
+        Otherwise, returns the transformed Y.
+        """
+        self.W[:,2:]=0
+        if Y is not None:
+            G = gaussian_kernel(X=Y, beta=self.beta, Y=self.Y)
+            return Y + np.dot(G, self.W)
+        else:
+            if self.low_rank is False:
+                self.TY = self.Y + np.dot(self.G, self.W)
+            elif self.low_rank is True:
+                self.TY = self.Y + np.matmul(self.Q, np.matmul(self.S, np.matmul(self.Q.T, self.W)))
+                return
+    def update_variance(self):
+        """
+        Update the variance of the mixture model using the new estimate of the deformable transformation.
+        See the update rule for sigma2 in Eq. 23 of of https://arxiv.org/pdf/0905.2635.pdf.
+        """
+        qprev = self.sigma2
+        # The original CPD paper does not explicitly calculate the objective functional.
+        # This functional will include terms from both the negative log-likelihood and
+        # the Gaussian kernel used for regularization.
+        self.q = np.inf
+        xPx = np.dot(np.transpose(self.Pt1), np.sum(
+            np.multiply(self.X, self.X), axis=1))
+        yPy = np.dot(np.transpose(self.P1),  np.sum(
+            np.multiply(self.TY, self.TY), axis=1))
+        trPXY = np.sum(np.multiply(self.TY, self.PX))
+        self.sigma2 = (xPx - 2 * trPXY + yPy) / (self.Np * self.D)
+        if self.sigma2 <= 0:
+            self.sigma2 = self.tolerance / 10
+        # Here we use the difference between the current and previous
+        # estimate of the variance as a proxy to test for convergence.
+        self.diff = np.abs(self.sigma2 - qprev)
+    def get_registration_parameters(self):
+        """
+        Return the current estimate of the deformable transformation parameters.
+        Returns
+        -------
+        self.G: numpy array
+            Gaussian kernel matrix.
+        self.W: numpy array
+            Deformable transformation matrix.
+        """
+        return self.G, self.W
+def initialize_sigma2(X, Y):
+    """
+    Initialize the variance (sigma2).
+    param
+    ----------
+    X: numpy array
+        NxD array of points for target.
+    Y: numpy array
+        MxD array of points for source.
+    Returns
+    -------
+    sigma2: float
+        Initial variance.
+    """
+    (N, D) = X.shape
+    (M, _) = Y.shape
+    diff = X[None, :, :] - Y[:, None, :]
+    err = diff ** 2
+    return np.sum(err) / (D * M * N)
+def gaussian_kernel(X, beta, Y=None):
+    """
+    Computes a Gaussian (RBF) kernel matrix between two sets of vectors.
+    :param X: A numpy array of shape (n_samples_X, n_features) representing the first set of vectors.
+    :param beta: The standard deviation parameter for the Gaussian kernel. It controls the spread of the kernel.
+    :param Y: An optional numpy array of shape (n_samples_Y, n_features) representing the second set of vectors.
+              If None, the function computes the kernel between `X` and itself (i.e., the Gram matrix).
+    :return: A numpy array of shape (n_samples_X, n_samples_Y) representing the Gaussian kernel matrix.
+             Each element (i, j) in the matrix is computed as:
+             `exp(-||X[i] - Y[j]||^2 / (2 * beta^2))`
+    """
+    # If Y is not provided, use X for both sets, computing the kernel matrix between X and itself
+    if Y is None:
+        Y = X
+    # Compute the difference tensor between each pair of vectors in X and Y
+    # The resulting shape is (n_samples_X, n_samples_Y, n_features)
+    diff = X[:, None, :] - Y[None, :, :]
+    # Square the differences element-wise
+    diff = np.square(diff)
+    # Sum the squared differences across the feature dimension (axis 2) to get squared Euclidean distances
+    # The resulting shape is (n_samples_X, n_samples_Y)
+    diff = np.sum(diff, axis=2)
+    # Apply the Gaussian (RBF) kernel formula: exp(-||X[i] - Y[j]||^2 / (2 * beta^2))
+    kernel_matrix = np.exp(-diff / (2 * beta**2))
+    return kernel_matrix
+def low_rank_eigen(G, num_eig):
+    """
+    Calculate the top `num_eig` eigenvectors and eigenvalues of a given Gaussian matrix G.
+    This function is useful for dimensionality reduction or when a low-rank approximation is needed.
+    :param G: A square matrix (numpy array) for which the eigen decomposition is to be performed.
+    :param num_eig: The number of top eigenvectors and eigenvalues to return, based on the magnitude of eigenvalues.
+    :return: A tuple containing:
+             - Q: A numpy array with shape (n, num_eig) containing the top `num_eig` eigenvectors of the matrix `G`.
+               Each column in `Q` corresponds to an eigenvector.
+             - S: A numpy array of shape (num_eig,) containing the top `num_eig` eigenvalues of the matrix `G`.
+    """
+    # Perform eigen decomposition on matrix G
+    # `S` will contain all the eigenvalues, and `Q` will contain the corresponding eigenvectors
+    S, Q = np.linalg.eigh(G)
+    # Sort eigenvalues in descending order based on their absolute values
+    # Get the indices of the top `num_eig` largest eigenvalues
+    eig_indices = list(np.argsort(np.abs(S))[::-1][:num_eig])
+    # Select the corresponding top eigenvectors based on the sorted indices
+    Q = Q[:, eig_indices]  # Q now contains the top `num_eig` eigenvectors
+    # Select the top `num_eig` eigenvalues based on the sorted indices
+    S = S[eig_indices]  # S now contains the top `num_eig` eigenvalues
+    return Q, S
+def find_homography_translation_rotation(src_points, dst_points):
+    """
+    Find the homography between two sets of coordinates with only translation and rotation.
+    :param src_points: A numpy array of shape (n, 2) containing source coordinates.
+    :param dst_points: A numpy array of shape (n, 2) containing destination coordinates.
+    :return: A 3x3 homography matrix.
+    """
+    # Ensure the points are in the correct shape
+    assert src_points.shape == dst_points.shape
+    assert src_points.shape[1] == 2
+    # Calculate the centroids of the point sets
+    src_centroid = np.mean(src_points, axis=0)
+    dst_centroid = np.mean(dst_points, axis=0)
+    # Center the points around the centroids
+    centered_src_points = src_points - src_centroid
+    centered_dst_points = dst_points - dst_centroid
+    # Calculate the covariance matrix
+    H = np.dot(centered_src_points.T, centered_dst_points)
+    # Singular Value Decomposition (SVD)
+    U, S, Vt = np.linalg.svd(H)
+    # Calculate the rotation matrix
+    R = np.dot(Vt.T, U.T)
+    # Ensure a proper rotation matrix (det(R) = 1)
+    if np.linalg.det(R) < 0:
+        Vt[-1, :] *= -1
+        R = np.dot(Vt.T, U.T)
+    # Calculate the translation vector
+    t = dst_centroid - np.dot(R, src_centroid)
+    # Construct the homography matrix
+    homography_matrix = np.eye(3)
+    homography_matrix[0:2, 0:2] = R
+    homography_matrix[0:2, 2] = t
+    return homography_matrix
+def apply_homography(coordinates, H):
+    """
+    Apply a 3x3 homography matrix to 2D coordinates.
+    :param coordinates: A numpy array of shape (n, 2) containing 2D coordinates.
+    :param H: A numpy array of shape (3, 3) representing the homography matrix.
+    :return: A numpy array of shape (n, 2) with transformed coordinates.
+    """
+    # Convert (x, y) to homogeneous coordinates (x, y, 1)
+    n = coordinates.shape[0]
+    homogeneous_coords = np.hstack((coordinates, np.ones((n, 1))))
+    # Apply the homography matrix
+    transformed_homogeneous = np.dot(homogeneous_coords, H.T)
+    # Convert back from homogeneous coordinates (x', y', w') to (x'/w', y'/w')
+    transformed_coords = transformed_homogeneous[:, :2] / transformed_homogeneous[:, [2]]
+    return transformed_coords
+def align_tissue(ad_tar_coor, ad_src_coor, pca_comb_features, src_img, alpha=0.5):
+    """
+    Aligns the source coordinates to the target coordinates using Coherent Point Drift (CPD)
+    registration, and applies a homography transformation to warp the source coordinates accordingly.
+    :param ad_tar_coor: Numpy array of target coordinates to which the source will be aligned.
+    :param ad_src_coor: Numpy array of source coordinates that will be aligned to the target.
+    :param pca_comb_features: PCA-combined feature matrix used as additional features for the alignment process.
+    :param src_img: Source image to be warped based on the alignment.
+    :param alpha: Regularization parameter for CPD registration, default is 0.5.
+    :return:
+        - cpd_coor: The new source coordinates after CPD alignment.
+        - homo_coor: The source coordinates after applying the homography transformation.
+        - aligned_image: The source image warped based on the homography transformation.
+    """
+    # Normalize target and source coordinates to the range [0, 1]
+    ad_tar_coor_z = (ad_tar_coor - ad_tar_coor.min()) / (ad_tar_coor.max() - ad_tar_coor.min())
+    ad_src_coor_z = (ad_src_coor - ad_src_coor.min()) / (ad_src_coor.max() - ad_src_coor.min())
+    # Normalize PCA-combined features to the range [0, 1]
+    pca_comb_features_z = (pca_comb_features - pca_comb_features.min()) / (pca_comb_features.max() - pca_comb_features.min())
+    # Concatenate spatial and PCA-combined features for target and source
+    target = np.concatenate((ad_tar_coor_z, pca_comb_features_z[:ad_tar_coor.shape[0], :2]), axis=1)
+    source = np.concatenate((ad_src_coor_z, pca_comb_features_z[ad_tar_coor.shape[0]:, :2]), axis=1)
+    # Initialize and run the CPD registration (deformable with regularization)
+    reg = DeformableRegistration(X=target, Y=source, low_rank=True,
+                                 alpha=alpha,
+                                 max_iterations=int(1e9), tolerance=1e-9)
+    TY = reg.register()[0]  # TY contains the transformed source points
+    # Rescale the CPD-aligned coordinates back to the original range of target coordinates
+    cpd_coor = TY[:, :2] * (ad_tar_coor.max() - ad_tar_coor.min()) + ad_tar_coor.min()
+    # Find homography transformation based on CPD-aligned coordinates and apply it
+    h = find_homography_translation_rotation(ad_src_coor, cpd_coor)
+    homo_coor = apply_homography(ad_src_coor, h)
+    # Warp the source image using the computed homography
+    aligned_image = cv2.warpPerspective(src_img, h, (src_img.shape[1], src_img.shape[0]))
+    # Return the CPD-aligned coordinates, the homography-transformed coordinates, and the warped image
+    return cpd_coor, homo_coor, aligned_image

src/loki/annotate.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import numpy as np
+import torch
+from torch.nn import functional as F
+import os
+import scanpy as sc
+import json
+import cv2
+def annotate_with_bulk(img_features, bulk_features, normalize=True, T=1, tensor=False):
+    """
+    Annotates tissue image with similarity scores between image features and bulk RNA-seq features.
+    :param img_features: Feature matrix representing histopathology image features.
+    :param bulk_features: Feature vector representing bulk RNA-seq features.
+    :param normalize: Whether to normalize similarity scores, default=True.
+    :param T: Temperature parameter to control the sharpness of the softmax distribution. Higher values result in a smoother distribution.
+    :param tensor: Feature format in torch tensor or not, default=False.
+    :return: An array or tensor containing the normalized similarity scores.
+    """
+    if tensor:
+        # Compute similarity between image features and bulk RNA-seq features
+        cosine_similarity = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
+        similarity = cosine_similarity(img_features, bulk_features.unsqueeze(0))  # Shape: [n]
+        # Optional normalization using the feature vector's norm
+        if normalize:
+            normalization_factor = torch.sqrt(torch.tensor([bulk_features.shape[0]], dtype=torch.float))  # sqrt(768)
+            similarity = similarity / normalization_factor
+        # Reshape and apply temperature scaling for softmax
+        similarity = similarity.unsqueeze(0)  # Shape: [1, n]
+        similarity = similarity / T  # Control distribution sharpness
+        # Convert similarity scores to probability distribution using softmax
+        similarity = torch.nn.functional.softmax(similarity, dim=-1)  # Shape: [1, n]
+    else:
+        # Compute similarity for non-tensor mode
+        similarity = np.dot(img_features.T, bulk_features)
+        # Apply a softmax-like normalization for numerical stability
+        max_similarity = np.max(similarity)  # Maximum value for stability
+        similarity = np.exp(similarity - max_similarity) / np.sum(np.exp(similarity - max_similarity))
+        # Normalize similarity scores to [0, 1] range for interpretation
+        similarity = (similarity - np.min(similarity)) / (np.max(similarity) - np.min(similarity))
+    return similarity
+def annotate_with_marker_genes(classes, image_embeddings, all_text_embeddings):
+    """
+    Annotates tissue image with similarity scores between image features and marker gene features.
+    :param classes: A list or array of tissue type labels.
+    :param image_embeddings: A numpy array or torch tensor of image embeddings (shape: [n_images, embedding_dim]).
+    :param all_text_embeddings: A numpy array or torch tensor of text embeddings of the marker genes
+                                (shape: [n_classes, embedding_dim]).
+    :return:
+        - dot_similarity: The matrix of dot product similarities between image embeddings and text embeddings.
+        - pred_class: The predicted tissue type for the image based on the highest similarity score.
+    """
+    # Calculate dot product similarity between image embeddings and text embeddings
+    # This results in a similarity matrix of shape [n_images, n_classes]
+    dot_similarity = image_embeddings @ all_text_embeddings.T
+    # Find the class with the highest similarity for each image
+    # Use argmax to identify the index of the highest similarity score
+    pred_class = classes[dot_similarity.argmax()]
+    return dot_similarity, pred_class
+def load_image_annotation(image_path):
+    """
+    Loads an image with annotation.
+    :param image_path: The file path to the image.
+    :return: The processed image, converted to BGR color space and of type uint8.
+    """
+    # Load the image from the specified file path using OpenCV
+    image = cv2.imread(image_path)
+    # Convert the color from RGB (OpenCV loads as BGR by default) to BGR (which matches common color standards)
+    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    # Ensure the image is of type uint8 for proper handling in OpenCV and other image processing libraries
+    image = image.astype(np.uint8)
+    return image

src/loki/decompose.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import pandas as pd
+import tangram as tg
+import numpy as np
+import torch
+import anndata
+from sklearn.decomposition import PCA
+from sklearn.neighbors import NearestNeighbors
+def generate_feature_ad(ad_expr, feature_path, sc=False):
+    """
+    Generates an AnnData object with OmiCLIP text or image embeddings.
+    :param ad_expr: AnnData object containing metadata for the dataset.
+    :param feature_path: Path to the CSV file containing the features to be loaded.
+    :param sc: Boolean flag indicating whether to copy single-cell metadata or ST metadata. Default is False (ST).
+    :return: A new AnnData object with the loaded features and relevant metadata from ad_expr.
+    """
+    # Load features from the CSV file. The index should match the cells/spots in ad_expr.obs.index.
+    features = pd.read_csv(feature_path, index_col=0)[ad_expr.obs.index]
+    # Create a new AnnData object with the features, transposing them to have cells/spots as rows
+    feature_ad = anndata.AnnData(features[ad_expr.obs.index].T)
+    # Copy relevant metadata from ad_expr based on the sc flag
+    if sc:
+        # If the data is single-cell (sc), copy the metadata from ad_expr.obs
+        feature_ad.obs = ad_expr.obs.copy()
+    else:
+        # If the data is spatial, copy the 'cell_num', 'spatial' info, and spatial coordinates
+        feature_ad.obs['cell_num'] = ad_expr.obs['cell_num'].copy()
+        feature_ad.uns['spatial'] = ad_expr.uns['spatial'].copy()
+        feature_ad.obsm['spatial'] = ad_expr.obsm['spatial'].copy()
+    return feature_ad
+def normalize_percentile(df, cols, min_percentile=5, max_percentile=95):
+    """
+    Clips and normalizes the specified columns of a DataFrame based on percentile thresholds,
+    transforming their values to the [0, 1] range.
+    :param df: A pandas DataFrame containing the columns to normalize.
+    :type df: pandas.DataFrame
+    :param cols: A list of column names in `df` that should be normalized.
+    :type cols: list[str]
+    :param min_percentile: The lower percentile used for clipping (defaults to 5).
+    :type min_percentile: float
+    :param max_percentile: The upper percentile used for clipping (defaults to 95).
+    :type max_percentile: float
+    :return: The same DataFrame with specified columns clipped and normalized.
+    :rtype: pandas.DataFrame
+    """
+    # Iterate over each column that needs to be normalized
+    for col in cols:
+        # Compute the lower and upper values at the given percentiles
+        min_val = np.percentile(df[col], min_percentile)
+        max_val = np.percentile(df[col], max_percentile)
+        # Clip the column's values between these percentile thresholds
+        df[col] = np.clip(df[col], min_val, max_val)
+        # Perform min-max normalization to scale the clipped values to the [0, 1] range
+        df[col] = (df[col] - min_val) / (max_val - min_val)
+    return df
+def cell_type_decompose(sc_ad, st_ad, cell_type_col='cell_type', NMS_mode=False, major_types=None, min_percentile=5, max_percentile=95):
+    """
+    Performs cell type decomposition on spatial data (ST or image) with single-cell data .
+    :param sc_ad: AnnData object containing single-cell meta data.
+    :param st_ad: AnnData object containing spatial data (ST or image) meta data.
+    :param cell_type_col: The column name in `sc_ad.obs` that contains cell type annotations. Default is 'cell_type'.
+    :param NMS_mode: Boolean flag to apply Non-Maximum Suppression (NMS) mode. Default is False.
+    :param major_types: Major cell types used for NMS mode. Default is None.
+    :param min_percentile: The lower percentile used for clipping (defaults to 5).
+    :param max_percentile: The upper percentile used for clipping (defaults to 95).
+    :return: The spatial AnnData object with projected cell type annotations.
+    """
+    # Preprocess the data for decomposition using tangram (tg)
+    tg.pp_adatas(sc_ad, st_ad, genes=None)  # Preprocessing: match genes between single-cell and spatial data
+    # Map single-cell data to spatial data using Tangram's "map_cells_to_space" function
+    ad_map = tg.map_cells_to_space(
+        sc_ad, st_ad,
+        mode="clusters",  # Map based on clusters (cell types)
+        cluster_label=cell_type_col,  # Column in `sc_ad.obs` representing cell type
+        device='cpu',  # Run on CPU (or 'cuda' if GPU is available)
+        scale=False,  # Don't scale data (can be set to True if needed)
+        density_prior='uniform',  # Use prior information for cell densities
+        random_state=10,  # Set random state for reproducibility
+        verbose=False,  # Disable verbose output for cleaner logging
+    )
+    # Project cell type annotations from the single-cell data to the spatial data
+    tg.project_cell_annotations(ad_map, st_ad, annotation=cell_type_col)
+    if NMS_mode:
+        major_types = major_types
+        st_ad.obs = normalize_percentile(st_ad.obsm['tangram_ct_pred'], major_types, min_percentile, max_percentile)
+        st_ad_binary = st_ad.obsm['tangram_ct_pred'][major_types].copy()
+        # Retain the max value in each row and set the rest to 0
+        st_ad.obs[major_types] = st_ad_binary.where(st_ad_binary.eq(st_ad_binary.max(axis=1), axis=0), other=0)
+    return st_ad  # Return the spatial AnnData object with the projected annotations
+def assign_cells_to_spots(cell_locs, spot_locs, patch_size=16):
+    """
+    Assigns cells to spots based on their spatial coordinates. Each cell within the specified patch size (radius)
+    of a spot will be assigned to that spot.
+    :param cell_locs: Numpy array of shape (n_cells, 2) with the x, y coordinates of the cells.
+    :param spot_locs: Numpy array of shape (n_spots, 2) with the x, y coordinates of the spots.
+    :param patch_size: The diameter of the spot patch. The radius used for assignment will be half of this value.
+    :return: A sparse matrix where each row corresponds to a cell and each column corresponds to a spot.
+                        The value is 1 if the cell is assigned to that spot, 0 otherwise.
+    """
+    # Initialize the NearestNeighbors model with a radius equal to half the patch size
+    neigh = NearestNeighbors(radius=patch_size * 0.5)
+    # Fit the model on the spot locations
+    neigh.fit(spot_locs)
+    # Create the radius neighbors graph which will assign cells to spots based on proximity
+    # This graph is a sparse matrix where rows are cells and columns are spots, with a 1 indicating assignment
+    A = neigh.radius_neighbors_graph(cell_locs, mode='connectivity')
+    return A

src/loki/plot.py ADDED Viewed

	@@ -0,0 +1,435 @@

+import matplotlib.pyplot as plt
+from pathlib import Path
+import json
+import cv2
+from matplotlib import cm
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+def plot_alignment(ad_tar_coor, ad_src_coor, homo_coor, pca_hex_comb, tar_features, shift=300, s=0.8, boundary_line=True):
+    """
+    Plots the target coordinates and alignment of source coordinates.
+    :param ad_tar_coor: Numpy array of target coordinates to be plotted in the first subplot.
+    :param ad_src_coor: Numpy array of source coordinates to be plotted in the second subplot.
+    :param homo_coor: Numpy array of alignment of source coordinates to be plotted in the third subplot.
+    :param pca_hex_comb: Color values (e.g., PCA or hex values) for plotting the coordinates.
+    :param tar_features: Feature matrix for the target, used to split color values between target and source data.
+    :param shift: Value used to adjust the plot limits around the coordinates for better visualization. Default is 300.
+    :param s: Marker size for the scatter plot points. Default is 0.8.
+    :param boundary_line: Boolean indicating whether to draw boundary lines (horizontal and vertical lines). Default is True.
+    :return: Displays the alignment plot of target, source, and alignment of source coordinates.
+    """
+    # Create a figure with three subplots, adjusting size and resolution
+    plt.figure(figsize=(10, 3), dpi=300)
+    # First subplot: Plot target coordinates
+    plt.subplot(1, 3, 1)
+    plt.scatter(ad_tar_coor[:, 0], ad_tar_coor[:, 1], marker='o', s=s, c=pca_hex_comb[:len(tar_features.T)])
+    # Set plot limits based on the minimum and maximum target coordinates, with extra padding from 'shift'
+    plt.xlim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    plt.ylim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    # Second subplot: Plot source coordinates
+    plt.subplot(1, 3, 2)
+    plt.scatter(ad_src_coor[:, 0], ad_src_coor[:, 1], marker='o', s=s, c=pca_hex_comb[len(tar_features.T):])
+    # Ensure consistent plot limits across subplots by using the same limits as the target coordinates
+    plt.xlim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    plt.ylim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    # Third subplot: Plot alignment of source coordinates
+    plt.subplot(1, 3, 3)
+    plt.scatter(homo_coor[:, 0], homo_coor[:, 1], marker='o', s=s, c=pca_hex_comb[len(tar_features.T):])
+    # Maintain the same plot limits across all subplots for a uniform comparison
+    plt.xlim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    plt.ylim([ad_tar_coor.min() - shift, ad_tar_coor.max() + shift])
+    # Optionally draw boundary lines at the minimum x and y values of the target coordinates
+    if boundary_line:
+        plt.axvline(x=ad_tar_coor[:, 0].min(), color='black')  # Vertical boundary line at the minimum x of target coordinates
+        plt.axhline(y=ad_tar_coor[:, 1].min(), color='black')  # Horizontal boundary line at the minimum y of target coordinates
+    # Remove the axis labels and ticks from all subplots for a cleaner appearance
+    plt.axis('off')
+    # Display the plot
+    plt.show()
+def plot_alignment_with_img(ad_tar_coor, ad_src_coor, homo_coor, tar_img, src_img, aligned_image, pca_hex_comb, tar_features):
+    """
+    Plots the target coordinates and alignment of source coordinates with their respective images in the background.
+    :param ad_tar_coor: Numpy array of target coordinates to be plotted in the first and third subplots.
+    :param ad_src_coor: Numpy array of source coordinates to be plotted in the second subplot.
+    :param homo_coor: Numpy array of alignment of source coordinates to be plotted in the third subplot.
+    :param tar_img: Image associated with the target coordinates, used as the background in the first subplot.
+    :param src_img: Image associated with the source coordinates, used as the background in the second subplot.
+    :param aligned_image: Image associated with the aligned coordinates, used as the background in the third subplot.
+    :param pca_hex_comb: Color values (e.g., PCA or hex values) for plotting the coordinates.
+    :param tar_features: Feature matrix for the target, used to split color values between target and source data.
+    :return: Displays the alignment plot of target, source, and alignment of source coordinates with their associated images.
+    """
+    # Create a figure with three subplots and set the size and resolution
+    plt.figure(figsize=(10, 8), dpi=150)
+    # First subplot: Plot target coordinates with the target image as the background
+    plt.subplot(1, 3, 1)
+    # Scatter plot for the target coordinates with transparency and small marker size
+    plt.scatter(ad_tar_coor[:, 0], ad_tar_coor[:, 1], marker='o', alpha=0.8, s=1, c=pca_hex_comb[:len(tar_features.T)])
+    # Overlay the target image with some transparency (alpha = 0.3)
+    plt.imshow(tar_img, origin='lower', alpha=0.3)
+    # Second subplot: Plot source coordinates with the source image as the background
+    plt.subplot(1, 3, 2)
+    # Scatter plot for the source coordinates with transparency and small marker size
+    plt.scatter(ad_src_coor[:, 0], ad_src_coor[:, 1], marker='o', alpha=0.8, s=1, c=pca_hex_comb[len(tar_features.T):])
+    # Overlay the source image with some transparency (alpha = 0.3)
+    plt.imshow(src_img, origin='lower', alpha=0.3)
+    # Third subplot: Plot both target and alignment of source coordinates with the aligned image as the background
+    plt.subplot(1, 3, 3)
+    # Scatter plot for the target coordinates with lower opacity (alpha = 0.2)
+    plt.scatter(ad_tar_coor[:, 0], ad_tar_coor[:, 1], marker='o', alpha=0.2, s=1, c=pca_hex_comb[:len(tar_features.T)])
+    # Scatter plot for the homologous coordinates with a '+' marker and the same color mapping
+    plt.scatter(homo_coor[:, 0], homo_coor[:, 1], marker='+', s=1, c=pca_hex_comb[len(tar_features.T):])
+    # Overlay the aligned image with some transparency (alpha = 0.3)
+    plt.imshow(aligned_image, origin='lower', alpha=0.3)
+    # Turn off the axis for all subplots to give a cleaner visual output
+    plt.axis('off')
+    # Display the plots
+    plt.show()
+def draw_polygon(image, polygon, color='k', thickness=2):
+    """
+    Draws one or more polygons on the given image.
+    :param image: The image on which to draw the polygons (as a numpy array).
+    :param polygon: A list of polygons, where each polygon is a list of (x, y) coordinate tuples.
+    :param color: A string or list of strings representing the color(s) for each polygon.
+                  If a single color is provided, it will be applied to all polygons. Default is 'k' (black).
+    :param thickness: An integer or a list of integers representing the thickness of the polygon borders.
+                      If a single value is provided, it will be applied to all polygons. Default is 2.
+    :return: The image with the polygons drawn on it.
+    """
+    # If the provided `color` is a single value (string), convert it to a list of the same color for each polygon
+    if not isinstance(color, list):
+        color = [color] * len(polygon)  # Create a list where each polygon gets the same color
+    # Loop through each polygon in the list, along with its corresponding color
+    for i, poly in enumerate(polygon):
+        # Get the color for the current polygon
+        c = color[i]
+        # Convert the color from a string format (e.g., 'k' or '#ff0000') to an RGB tuple
+        c = color_string_to_rgb(c)
+        # Get the thickness value for the current polygon (if a list is provided, use the corresponding value)
+        t = thickness[i] if isinstance(thickness, list) else thickness
+        # Convert the polygon coordinates to a numpy array of integers
+        poly = np.array(poly, np.int32)
+        # Reshape the polygon array to match OpenCV's expected input format: (number of points, 1, 2)
+        poly = poly.reshape((-1, 1, 2))
+        # Draw the polygon on the image using OpenCV's `cv2.polylines` function
+        # `isClosed=True` indicates that the polygon should be closed (start and end points are connected)
+        image = cv2.polylines(image, [poly], isClosed=True, color=c, thickness=t)
+    return image
+def blend_images(image1, image2, alpha=0.5):
+    """
+    Blends two images together.
+    :param image1: Background image, a numpy array of shape (H, W, 3), where H is height, W is width, and 3 represents the RGB color channels.
+    :param image2: Foreground image, a numpy array of shape (H, W, 3), same dimensions as image1.
+    :param alpha: Blending factor, a float between 0 and 1. The value of alpha determines the weight of image1 in the blend,
+                  where 0 means only image2 is shown, and 1 means only image1 is shown. Default is 0.5 (equal blending).
+    :return: A blended image, where each pixel is a weighted combination of the corresponding pixels from image1 and image2.
+            The blending is computed as: `blended = alpha * image1 + (1 - alpha) * image2`.
+    """
+    # Use cv2.addWeighted to blend the two images.
+    # The first image (image1) is weighted by 'alpha', and the second image (image2) is weighted by '1 - alpha'.
+    blended = cv2.addWeighted(image1, alpha, image2, 1 - alpha, 0)
+    # Return the resulting blended image.
+    return blended
+def color_string_to_rgb(color_string):
+    """
+    Converts a color string to an RGB tuple.
+    :param color_string: A string representing the color. This can be in hexadecimal form (e.g., '#ff0000') or
+                         a shorthand character for basic colors (e.g., 'k' for black, 'r' for red, etc.).
+    :return:
+            A tuple (r, g, b) representing the RGB values of the color, where each value is an integer between 0 and 255.
+    :raises:
+            ValueError: If the color string is not recognized.
+    """
+    # Remove any spaces in the color string
+    color_string = color_string.replace(' ', '')
+    # If the string starts with a '#', it's a hexadecimal color, so we remove the '#'
+    if color_string.startswith('#'):
+        color_string = color_string[1:]
+    else:
+        # Handle shorthand single-letter color codes by converting them to hex values
+        # 'k' -> black, 'r' -> red, 'g' -> green, 'b' -> blue, 'w' -> white
+        if color_string == 'k':  # Black
+            color_string = '000000'
+        elif color_string == 'r':  # Red
+            color_string = 'ff0000'
+        elif color_string == 'g':  # Green
+            color_string = '00ff00'
+        elif color_string == 'b':  # Blue
+            color_string = '0000ff'
+        elif color_string == 'w':  # White
+            color_string = 'ffffff'
+        else:
+            # Raise an error if the color string is not recognized
+            raise ValueError(f"Unknown color string {color_string}")
+    # Convert the first two characters to the red (R) value
+    r = int(color_string[:2], 16)
+    # Convert the next two characters to the green (G) value
+    g = int(color_string[2:4], 16)
+    # Convert the last two characters to the blue (B) value
+    b = int(color_string[4:], 16)
+    # Return the RGB values as a tuple
+    return (r, g, b)
+def plot_heatmap(
+    coor,
+    similairty,
+    image_path=None,
+    patch_size=(256, 256),
+    save_path=None,
+    downsize=32,
+    cmap='turbo',
+    smooth=False,
+    boxes=None,
+    box_color='k',
+    box_thickness=2,
+    polygons=None,
+    polygons_color='k',
+    polygons_thickness=2,
+    image_alpha=0.5
+):
+    """
+    Plots a heatmap overlaid on an image based on given coordinates and similairty.
+    :param coor: Array of coordinates (N, 2) where N is the number of patches to place on the heatmap.
+    :param similairty: Array of similairty (N,) corresponding to the coordinates. These similairties are mapped to colors using a colormap.
+    :param image_path: Path to the background image on which the heatmap will be overlaid. If None, a blank white background is used.
+    :param patch_size: Size of each patch in pixels (default is 256x256).
+    :param save_path: Path to save the heatmap image. If None, the heatmap is returned instead of being saved.
+    :param downsize: Factor to downsize the image and patches for faster processing. Default is 32.
+    :param cmap: Colormap to map the similairties to colors. Default is 'turbo'.
+    :param smooth: Boolean to indicate if the heatmap should be smoothed. Not implemented in this version.
+    :param boxes: List of boxes in (x, y, w, h) format. If provided, boxes will be drawn on the heatmap.
+    :param box_color: Color of the boxes. Default is black ('k').
+    :param box_thickness: Thickness of the box outlines.
+    :param polygons: List of polygons (N, 2) to draw on the heatmap.
+    :param polygons_color: Color of the polygon outlines. Default is black ('k').
+    :param polygons_thickness: Thickness of the polygon outlines.
+    :param image_alpha: Transparency value (0 to 1) for blending the heatmap with the original image. Default is 0.5.
+    :return:
+        - heatmap: The generated heatmap as a numpy array (RGB).
+        - image: The original image with overlaid polygons if provided.
+    """
+    # Read the background image (if provided), otherwise a blank image
+    image = cv2.imread(image_path)
+    image_size = (image.shape[0], image.shape[1])  # Get the size of the image
+    coor = [(x // downsize, y // downsize) for x, y in coor]  # Downsize the coordinates for faster processing
+    patch_size = (patch_size[0] // downsize, patch_size[1] // downsize)  # Downsize the patch size
+    # Convert similairties to colors using the provided colormap
+    cmap = plt.get_cmap(cmap)  # Get the colormap object
+    norm = plt.Normalize(vmin=similairty.min(), vmax=similairty.max())  # Normalize similairties to map to color range
+    colors = cmap(norm(similairty))  # Convert the normalized similairties to RGB colors
+    # Initialize a blank white heatmap the size of the image
+    heatmap = np.ones((image_size[0], image_size[1], 3)) * 255  # Start with a white background
+    # Place the colored patches on the heatmap according to the coordinates and patch size
+    for i in range(len(coor)):
+        x, y = coor[i]
+        w = colors[i][:3] * 255  # Get the RGB color for the patch, scaling from [0, 1] to [0, 255]
+        w = w.astype(np.uint8)  # Convert the color to uint8
+        heatmap[y:y + patch_size[0], x:x + patch_size[1], :] = w  # Place the patch on the heatmap
+    # If the image_alpha is greater than 0, blend the heatmap with the original image
+    if image_alpha > 0:
+        image = np.array(image)
+        # Pad the image if necessary to match the heatmap size
+        if image.shape[0] < heatmap.shape[0]:
+            pad = heatmap.shape[0] - image.shape[0]
+            image = np.pad(image, ((0, pad), (0, 0), (0, 0)), mode='constant', constant_values=255)
+        if image.shape[1] < heatmap.shape[1]:
+            pad = heatmap.shape[1] - heatmap.shape[1]
+            image = np.pad(image, ((0, 0), (0, pad), (0, 0)), mode='constant', constant_values=255)
+        # Convert the image to BGR (for OpenCV compatibility) and blend with the heatmap
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        image = image.astype(np.uint8)
+        heatmap = heatmap.astype(np.uint8)
+        heatmap = blend_images(heatmap, image, alpha=image_alpha)  # Blend the heatmap and the image
+    # If polygons are provided, draw them on the heatmap and image
+    if polygons is not None:
+        polygons = [poly // downsize for poly in polygons]  # Downsize the polygon coordinates
+        image_polygons = draw_polygon(image, polygons, color=polygons_color, thickness=polygons_thickness)  # Draw polygons on the original image
+        heatmap_polygons = draw_polygon(heatmap, polygons, color=polygons_color, thickness=polygons_thickness)  # Draw polygons on the heatmap
+        return heatmap_polygons, image_polygons  # Return the heatmap and image with polygons drawn on them
+    else:
+        return heatmap, image  # Return the heatmap and image
+def show_images_side_by_side(image1, image2, title1=None, title2=None):
+    """
+    Displays two images side by side in a single figure.
+    :param image1: The first image to display (as a numpy array).
+    :param image2: The second image to display (as a numpy array).
+    :param title1: The title for the first image. Default is None (no title).
+    :param title2: The title for the second image. Default is None (no title).
+    :return: Displays the images side by side.
+    """
+    # Create a figure with 2 subplots (1 row, 2 columns), and set the figure size
+    fig, ax = plt.subplots(1, 2, figsize=(15,8))
+    # Display the first image on the first subplot
+    ax[0].imshow(image1)
+    # Display the second image on the second subplot
+    ax[1].imshow(image2)
+    # Set the title for the first image (if provided)
+    ax[0].set_title(title1)
+    # Set the title for the second image (if provided)
+    ax[1].set_title(title2)
+    # Remove axis labels and ticks for both images to give a cleaner look
+    ax[0].axis('off')
+    ax[1].axis('off')
+    # Show the final figure with both images displayed side by side
+    plt.show()
+def plot_img_with_annotation(fullres_img, roi_polygon, linewidth, xlim, ylim):
+    """
+    Plots image with polygons.
+    :param fullres_img: The full-resolution image to display (as a numpy array).
+    :param roi_polygon: A list of polygons, where each polygon is a list of (x, y) coordinate tuples.
+    :param linewidth: The thickness of the lines used to draw the polygons.
+    :param xlim: A tuple (xmin, xmax) defining the x-axis limits for zooming in on a specific region of the image.
+    :param ylim: A tuple (ymin, ymax) defining the y-axis limits for zooming in on a specific region of the image.
+    :return: Displays the image with ROI polygons overlaid.
+    """
+    # Create a new figure with a fixed size for displaying the image and annotations
+    plt.figure(figsize=(10, 10))
+    # Display the full-resolution image
+    plt.imshow(fullres_img)
+    # Loop through each polygon in roi_polygon and plot them on the image
+    for polygon in roi_polygon:
+        x, y = zip(*polygon)  # Unzip the list of (x, y) tuples into separate x and y coordinate lists
+        plt.plot(x, y, color='black', linewidth=linewidth)  # Plot the polygon using the specified linewidth
+    # Set the x-axis limits based on the provided tuple (xlim)
+    plt.xlim(xlim)
+    # Set the y-axis limits based on the provided tuple (ylim)
+    plt.ylim(ylim)
+    # Invert the y-axis to match the typical image display convention (origin at the top-left)
+    plt.gca().invert_yaxis()
+    # Turn off the axis to give a cleaner image display without ticks or labels
+    plt.axis('off')
+def plot_annotation_heatmap(st_ad, roi_polygon, s, linewidth, xlim, ylim):
+    """
+    Plots tissue type annotation heatmap.
+    :param st_ad: AnnData object containing coordinates in `obsm['spatial']`
+                  and similarity scores in `obs['bulk_simi']`.
+    :param roi_polygon: A list of polygons, where each polygon is a list of (x, y) coordinate tuples.
+    :param s: The size of the scatter plot markers representing each spatial transcriptomics spot.
+    :param linewidth: The thickness of the lines used to draw the polygons.
+    :param xlim: A tuple (xmin, xmax) defining the x-axis limits for zooming in on a specific region of the image.
+    :param ylim: A tuple (ymin, ymax) defining the y-axis limits for zooming in on a specific region of the image.
+    :return: Displays the heatmap with polygons overlaid.
+    """
+    # Create a new figure with a fixed size for displaying the heatmap and annotations
+    plt.figure(figsize=(10, 10))
+    # Scatter plot for the spatial transcriptomics data.
+    # The 'spatial' coordinates are plotted with color intensity based on 'bulk_simi' values.
+    plt.scatter(
+        st_ad.obsm['spatial'][:, 0], st_ad.obsm['spatial'][:, 1],  # x and y coordinates
+        c=st_ad.obs['bulk_simi'],  # Color values based on 'bulk_simi'
+        s=s,  # Size of each marker
+        vmin=0.1, vmax=0.95,  # Set the range for the color normalization
+        cmap='turbo'  # Use the 'turbo' colormap for the heatmap
+    )
+    # Loop through each polygon in roi_polygon and plot them on the image
+    for polygon in roi_polygon:
+        x, y = zip(*polygon)  # Unzip the list of (x, y) tuples into separate x and y coordinate lists
+        plt.plot(x, y, color='black', linewidth=linewidth)  # Plot the polygon using the specified linewidth
+    # Set the x-axis limits based on the provided tuple (xlim)
+    plt.xlim(xlim)
+    # Set the y-axis limits based on the provided tuple (ylim)
+    plt.ylim(ylim)
+    # Invert the y-axis to match the typical image display convention (origin at the top-left)
+    plt.gca().invert_yaxis()
+    # Turn off the axis to give a cleaner image display without ticks or labels
+    plt.axis('off')

src/loki/predex.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import pandas as pd
+def predict_st_gene_expr(image_text_similarity, train_data):
+    """
+    Predicts ST gene expression by H&E image.
+    :param image_text_similarity: Numpy array of similarities between images and text features (shape: [n_samples, n_genes]).
+    :param train_data: Numpy array or DataFrame of training data used for making predictions (shape: [n_genes, n_shared_genes]).
+    :return: Numpy array or DataFrame containing the predicted gene expression levels for the samples.
+    """
+    # Compute the weighted sum of the train_data using image_text_similarity
+    weighted_sum = image_text_similarity @ train_data
+    # Compute the normalization factor (sum of the image-text similarities for each sample)
+    weights = image_text_similarity.sum(axis=1, keepdims=True)
+    # Normalize the predicted matrix to get weighted gene expression predictions
+    predicted_image_text_matrix = weighted_sum / weights
+    return predicted_image_text_matrix

src/loki/preprocess.py ADDED Viewed

	@@ -0,0 +1,324 @@

+import scanpy as sc
+import numpy as np
+import pandas as pd
+import json
+import os
+from PIL import Image
+def generate_gene_df(ad, house_keeping_genes, todense=True):
+    """
+    Generates a DataFrame with the top 50 genes for each observation in an AnnData object.
+    It removes genes containing '.' or '-' in their names, as well as genes listed in
+    the provided `house_keeping_genes` DataFrame/Series under the 'genesymbol' column.
+    :param ad: An AnnData object containing gene expression data.
+    :type ad: anndata.AnnData
+    :param house_keeping_genes: DataFrame or Series with a 'genesymbol' column listing housekeeping genes to exclude.
+    :type house_keeping_genes: pandas.DataFrame or pandas.Series
+    :param todense: Whether to convert the sparse matrix (ad.X) to a dense matrix before creating a DataFrame.
+    :type todense: bool
+    :return: A DataFrame (`top_k_genes_str`) that contains a 'label' column. Each row in 'label' is a string
+             with the top 50 gene names (space-separated) for that observation.
+    :rtype: pandas.DataFrame
+    """
+    # Remove genes containing '.' in their names
+    ad = ad[:, ~ad.var.index.str.contains('.', regex=False)]
+    # Remove genes containing '-'
+    ad = ad[:, ~ad.var.index.str.contains('-', regex=False)]
+    # Exclude housekeeping genes
+    ad = ad[:, ~ad.var.index.isin(house_keeping_genes['genesymbol'])]
+    # Convert to dense if requested; otherwise use the data as-is
+    if todense:
+        expr = pd.DataFrame(ad.X.todense(), index=ad.obs.index, columns=ad.var.index)
+    else:
+        expr = pd.DataFrame(ad.X, index=ad.obs.index, columns=ad.var.index)
+    # For each row (observation), find the top 50 genes with the highest expression
+    top_k_genes = expr.apply(lambda s, n: pd.Series(s.nlargest(n).index), axis=1, n=50)
+    # Create a new DataFrame to store the labels (space-separated top gene names)
+    top_k_genes_str = pd.DataFrame()
+    top_k_genes_str['label'] = top_k_genes[top_k_genes.columns].astype(str) \
+        .apply(lambda x: ' '.join(x), axis=1)
+    return top_k_genes_str
+def segment_patches(img_array, coord, patch_dir, height=20, width=20):
+    """
+    Extracts small image patches centered at specified coordinates and saves them as individual PNG files.
+    :param img_array: A NumPy array representing the full-resolution image. Shape is expected to be (H, W[, C]).
+    :type img_array: numpy.ndarray
+    :param coord: A pandas DataFrame containing patch center coordinates in columns "pixel_x" and "pixel_y".
+                  The index corresponds to spot IDs. Example columns: ["pixel_x", "pixel_y"].
+    :type coord: pandas.DataFrame
+    :param patch_dir: Directory path where the patch images will be saved.
+    :type patch_dir: str
+    :param height: The patch's height in pixels (distance in the y-direction).
+    :type height: int
+    :param width: The patch's width in pixels (distance in the x-direction).
+    :type width: int
+    :return: None. The function saves image patches to `patch_dir` but does not return anything.
+    """
+    # Ensure the output directory exists; create it if it doesn't
+    if not os.path.exists(patch_dir):
+        os.makedirs(patch_dir)
+    # Extract the overall height and width of the image
+    yrange, xrange = img_array.shape[:2]
+    # Iterate through each coordinate in the DataFrame
+    for spot_idx in coord.index:
+        # Retrieve the center x and y coordinates for the current spot
+        ycenter, xcenter = coord.loc[spot_idx, ["pixel_x", "pixel_y"]]
+        # Compute the top-left (x1, y1) and bottom-right (x2, y2) boundaries of the patch
+        x1 = round(xcenter - width / 2)
+        y1 = round(ycenter - height / 2)
+        x2 = x1 + width
+        y2 = y1 + height
+        # Check if the patch boundaries go outside the image
+        if x1 < 0 or y1 < 0 or x2 > xrange or y2 > yrange:
+            print(f"Patch {spot_idx} is out of range and will be skipped.")
+            continue
+        # Extract the patch and convert to a PIL Image; cast to uint8 if needed
+        patch_img = Image.fromarray(img_array[y1:y2, x1:x2].astype(np.uint8))
+        # Create a filename for the patch image (e.g., "0_hires.png")
+        patch_name = f"{spot_idx}_hires.png"
+        patch_path = os.path.join(patch_dir, patch_name)
+        # Save the patch image to disk
+        patch_img.save(patch_path)
+def read_gct(file_path):
+    """
+    Reads a GCT file, parses its dimensions, and returns the data as a pandas DataFrame.
+    :param file_path: The path to the GCT file to be read.
+    :return: A pandas DataFrame containing the GCT data, where the first two columns represent gene names and descriptions,
+                  and the subsequent columns contain the expression data.
+    """
+    # Open the GCT file for reading
+    with open(file_path, 'r') as file:
+        # Read and ignore the first line (GCT version line)
+        file.readline()
+        # Read the second line which contains the dimensions of the data matrix
+        dims = file.readline().strip().split()  # Split the dimensions line by whitespace
+        num_rows = int(dims[0])  # Number of data rows (genes)
+        num_cols = int(dims[1])  # Number of data columns (samples + metadata)
+        # Read the data starting from the third line, using pandas for tab-delimited data
+        # The first two columns in GCT files are "Name" and "Description" (gene identifiers and annotations)
+        data = pd.read_csv(file, sep='\t', header=0, nrows=num_rows)
+    # Return the loaded data as a pandas DataFrame
+    return data
+def get_library_id(adata):
+    """
+    Retrieves the library ID from the AnnData object, assuming it contains spatial data.
+    The function will return the first library ID found in `adata.uns['spatial']`.
+    :param adata: AnnData object containing spatial information in `adata.uns['spatial']`.
+    :return: The first library ID found in `adata.uns['spatial']`.
+    :raises:
+            AssertionError: If 'spatial' is not present in `adata.uns`.
+            Logs an error if no library ID is found.
+    """
+    # Check if 'spatial' is present in adata.uns; raises an error if not found
+    assert 'spatial' in adata.uns, "spatial not present in adata.uns"
+    # Retrieve the list of library IDs (which are keys in the 'spatial' dictionary)
+    library_ids = adata.uns['spatial'].keys()
+    try:
+        # Attempt to return the first library ID (converting the keys object to a list)
+        library_id = list(library_ids)[0]
+        return library_id
+    except IndexError:
+        # If no library IDs exist, log an error message
+        logger.error('No library_id found in adata')
+def get_scalefactors(adata, library_id=None):
+    """
+    Retrieves the scalefactors from the AnnData object for a given library ID. If no library ID is provided,
+    the function will automatically retrieve the first available library ID.
+    :param adata: AnnData object containing spatial data and scalefactors in `adata.uns['spatial']`.
+    :param library_id: The library ID for which the scalefactors are to be retrieved. If not provided, it defaults to the first available ID.
+    :return: A dictionary containing scalefactors for the specified library ID.
+    """
+    # If no library_id is provided, retrieve the first available library ID
+    if library_id is None:
+        library_id = get_library_id(adata)
+    try:
+        # Attempt to retrieve the scalefactors for the specified library ID
+        scalef = adata.uns['spatial'][library_id]['scalefactors']
+        return scalef
+    except KeyError:
+        # Log an error if the scalefactors or library ID is not found
+        logger.error('scalefactors not found in adata')
+def get_spot_diameter_in_pixels(adata, library_id=None):
+    """
+    Retrieves the spot diameter in pixels from the AnnData object's scalefactors for a given library ID.
+    If no library ID is provided, the function will automatically retrieve the first available library ID.
+    :param adata: AnnData object containing spatial data and scalefactors in `adata.uns['spatial']`.
+    :param library_id: The library ID for which the spot diameter is to be retrieved. If not provided, defaults to the first available ID.
+    :return: The spot diameter in full resolution pixels, or None if not found.
+    """
+    # Get the scalefactors for the specified or default library ID
+    scalef = get_scalefactors(adata, library_id=library_id)
+    try:
+        # Attempt to retrieve the spot diameter in full resolution from the scalefactors
+        spot_diameter = scalef['spot_diameter_fullres']
+        return spot_diameter
+    except TypeError:
+        # Handle case where `scalef` is None or invalid (if get_scalefactors returned None)
+        pass
+    except KeyError:
+        # Log an error if the 'spot_diameter_fullres' key is not found in the scalefactors
+        logger.error('spot_diameter_fullres not found in adata')
+def prepare_data_for_alignment(data_path, scale_type='tissue_hires_scalef'):
+    """
+    Prepares data for alignment by reading an AnnData object and preparing the high-resolution tissue image.
+    :param data_path: The path to the AnnData (.h5ad) file containing the Visium data.
+    :param scale_type: The type of scale factor to use (`tissue_hires_scalef` by default).
+    :return:
+        - ad: AnnData object containing the spatial transcriptomics data.
+        - ad_coor: Numpy array of scaled spatial coordinates (adjusted for the specified resolution).
+        - img: High-resolution tissue image, normalized to 8-bit unsigned integers.
+    :raises:
+            ValueError: If required data (e.g., scale factors, spatial coordinates, or images) is missing.
+    """
+    # Load the AnnData object from the specified file path
+    ad = sc.read_h5ad(data_path)
+    # Ensure the variable (gene) names are unique to avoid potential conflicts
+    ad.var_names_make_unique()
+    try:
+        # Retrieve the specified scale factor for spatial coordinates
+        scalef = get_scalefactors(ad)[scale_type]
+    except KeyError:
+        raise ValueError(f"Scale factor '{scale_type}' not found in ad.uns['spatial']")
+    # Scale the spatial coordinates using the specified scale factor
+    try:
+        ad_coor = np.array(ad.obsm['spatial']) * scalef
+    except KeyError:
+        raise ValueError("Spatial coordinates not found in ad.obsm['spatial']")
+    # Retrieve the high-resolution tissue image
+    try:
+        img = ad.uns['spatial'][get_library_id(ad)]['images']['hires']
+    except KeyError:
+        raise ValueError("High-resolution image not found in ad.uns['spatial']")
+    # If the image values are normalized to [0, 1], convert to 8-bit format for compatibility
+    if img.max() < 1.1:
+        img = (img * 255).astype('uint8')
+    return ad, ad_coor, img
+def load_data_for_annotation(st_data_path, json_path, in_tissue=True):
+    """
+    Loads spatial transcriptomics (ST) data from an .h5ad file and prepares it for annotation.
+    :param sample_type: The type or category of the sample (used to locate the data in the directory structure).
+    :param sample_name: The name of the sample (used to locate specific files).
+    :param in_tissue: Boolean flag to filter the data to include only spots that are in tissue. Default is True.
+    :return:
+        - st_ad: AnnData object containing the spatial transcriptomics data, with spatial coordinates in `obs`.
+        - library_id: The library ID associated with the spatial data.
+        - roi_polygon: Region of interest polygon loaded from a JSON file for further annotation or analysis.
+    """
+    # Load the spatial transcriptomics data into an AnnData object
+    st_ad = sc.read_h5ad(st_data_path)
+    # Optionally filter the data to include only spots that are within the tissue
+    if in_tissue:
+        st_ad = st_ad[st_ad.obs['in_tissue'] == 1]
+    # Initialize pixel coordinates for spatial information
+    st_ad.obs[["pixel_y", "pixel_x"]] = None  # Ensure the columns exist
+    st_ad.obs[["pixel_y", "pixel_x"]] = st_ad.obsm['spatial']  # Copy spatial coordinates into obs
+    # Retrieve the library ID associated with the spatial data
+    library_id = get_library_id(st_ad)
+    # Load the region of interest (ROI) polygon from a JSON file
+    with open(json_path) as f:
+        roi_polygon = json.load(f)
+    return st_ad, library_id, roi_polygon
+def read_polygons(file_path, slide_id):
+    """
+    Reads polygon data from a JSON file for a specific slide ID, extracting coordinates, colors, and thickness.
+    :param file_path: Path to the JSON file containing polygon configurations.
+    :param slide_id: Identifier for the specific slide whose polygon data is to be extracted.
+    :return:
+        - polygons: A list of numpy arrays, where each array contains the coordinates of a polygon.
+        - polygon_colors: A list of color values corresponding to each polygon.
+        - polygon_thickness: A list of thickness values for each polygon's border.
+    """
+    # Open the JSON file and load the polygon configurations into a Python dictionary
+    with open(file_path, 'r') as f:
+        polygons_configs = json.load(f)
+    # Check if the given slide_id exists in the polygon configurations
+    if slide_id not in polygons_configs:
+        return None, None, None  # If slide_id is not found, return None for all outputs
+    # Extract the polygon coordinates, colors, and thicknesses for the given slide_id
+    polygons = [np.array(poly['coords']) for poly in polygons_configs[slide_id]]  # Convert polygon coordinates to numpy arrays
+    polygon_colors = [poly['color'] for poly in polygons_configs[slide_id]]  # Extract the color for each polygon
+    polygon_thickness = [poly['thickness'] for poly in polygons_configs[slide_id]]  # Extract the thickness for each polygon
+    # Return the polygons, their colors, and their thicknesses
+    return polygons, polygon_colors, polygon_thickness

src/loki/requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+anndata==0.10.9
+matplotlib==3.9.2
+numpy==1.25.0
+pandas==2.2.3
+opencv-python==4.10.0.84
+pycpd==2.0.0
+torch==2.3.1
+tangram-sc==1.0.4
+tqdm==4.66.5
+torchvision==0.18.1
+open_clip_torch==2.26.1
+pillow==10.4.0
+ipykernel==6.29.5

src/loki/retrieve.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import torch
+def retrieve_st_by_image(image_embeddings, all_text_embeddings, dataframe, k=3):
+    """
+    Retrieves the top-k most similar ST based on the similarity between ST embeddings and image embeddings.
+    :param image_embeddings: A numpy array or torch tensor containing image embeddings (shape: [1, embedding_dim]).
+    :param all_text_embeddings: A numpy array or torch tensor containing ST embeddings (shape: [n_samples, embedding_dim]).
+    :param dataframe: A pandas DataFrame containing information about the ST samples, specifically the image indices in the 'img_idx' column.
+    :param k: The number of top similar samples to retrieve. Default is 3.
+    :return: A list of the filenames or indices corresponding to the top-k similar samples.
+    """
+    # Compute the dot product (similarity) between the image embeddings and all ST embeddings
+    dot_similarity = image_embeddings @ all_text_embeddings.T
+    # Retrieve the top-k most similar samples by similarity score (dot product)
+    values, indices = torch.topk(dot_similarity.squeeze(0), k)
+    # Extract the image filenames or indices from the DataFrame based on the top-k matches
+    image_filenames = dataframe['img_idx'].values
+    matches = [image_filenames[idx] for idx in indices]
+    return matches

src/loki/utils.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import os
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn.functional as F
+from PIL import Image
+import json
+import cv2
+from sklearn.decomposition import PCA
+from open_clip import create_model_from_pretrained, get_tokenizer
+def load_model(model_path, device):
+    """
+    Loads a pretrained OmiCLIP model, along with its preprocessing function and tokenizer,
+    using the specified model checkpoint.
+    :param model_path: File path to the pretrained model checkpoint. This is passed to
+                       `create_model_from_pretrained` as the `pretrained` argument.
+    :type model_path: str
+    :param device: The device on which to load the model (e.g., 'cpu' or 'cuda').
+    :type device: str or torch.device
+    :return: A tuple `(model, preprocess, tokenizer)` where:
+             - model: The loaded OmiCLIP model.
+             - preprocess: A function or transform that preprocesses input data for the model.
+             - tokenizer: A tokenizer appropriate for textual input to the model.
+    :rtype: (nn.Module, callable, callable)
+    """
+    # Create the model and its preprocessing transform from the specified checkpoint
+    model, preprocess = create_model_from_pretrained(
+        "coca_ViT-L-14", device=device, pretrained=model_path
+    )
+    # Retrieve a tokenizer compatible with the "coca_ViT-L-14" architecture
+    tokenizer = get_tokenizer('coca_ViT-L-14')
+    return model, preprocess, tokenizer
+def encode_image(model, preprocess, image):
+    """
+    Encodes an image into a normalized feature embedding using the specified model and preprocessing function.
+    :param model: A model object that provides an `encode_image` method.
+    :type model: torch.nn.Module
+    :param preprocess: A preprocessing function that transforms the input image into a tensor
+                       suitable for the model. Typically something returning a PyTorch tensor.
+    :type preprocess: callable
+    :param image: The input image (PIL Image, NumPy array, or other format supported by `preprocess`).
+    :type image: PIL.Image.Image or numpy.ndarray
+    :return: A single normalized image embedding as a PyTorch tensor of shape (1, embedding_dim).
+    :rtype: torch.Tensor
+    """
+    # Preprocess the image, then stack to create a batch of size 1
+    image_input = torch.stack([preprocess(image)])
+    # Generate the image features without gradient tracking
+    with torch.no_grad():
+        image_features = model.encode_image(image_input)
+    # Normalize embeddings across the feature dimension (L2 normalization)
+    image_embeddings = F.normalize(image_features, p=2, dim=-1)
+    return image_embeddings
+def encode_image_patches(model, preprocess, data_dir, img_list):
+    """
+    Encodes multiple image patches into normalized feature embeddings using a specified model and preprocess function.
+    :param model: A model object that provides an `encode_image` method.
+    :type model: torch.nn.Module
+    :param preprocess: A preprocessing function that transforms the input image into a tensor
+                       suitable for the model. Typically something returning a PyTorch tensor.
+    :type preprocess: callable
+    :param data_dir: The base directory containing image data.
+    :type data_dir: str
+    :param img_list: A list of image filenames (strings). Each filename corresponds to a patch image
+                     stored in `data_dir/demo_data/patch/`.
+    :type img_list: list[str]
+    :return: A PyTorch tensor of shape (N, 1, embedding_dim), containing the normalized embeddings
+             for each image in `img_list`.
+    :rtype: torch.Tensor
+    """
+    # Prepare a list to hold each image's feature embedding
+    image_embeddings = []
+    # Loop through each image name in the provided list
+    for img_name in img_list:
+        # Build the path to the patch image and open it
+        image_path = os.path.join(data_dir, 'demo_data', 'patch', img_name)
+        image = Image.open(image_path)
+        # Encode the image using the model & preprocess; returns shape (1, embedding_dim)
+        image_features = encode_image(model, preprocess, image)
+        # Accumulate the feature embeddings in the list
+        image_embeddings.append(image_features)
+    # Convert the list of embeddings to a NumPy array, then to a PyTorch tensor
+    # Resulting shape will be (N, 1, embedding_dim)
+    image_embeddings = torch.from_numpy(np.array(image_embeddings))
+    # Normalize all embeddings across the feature dimension (L2 normalization)
+    image_embeddings = F.normalize(image_embeddings, p=2, dim=-1)
+    return image_embeddings
+def encode_text(model, tokenizer, text):
+    """
+    Encodes text into a normalized feature embedding using a specified model and tokenizer.
+    :param model: A model object that provides an `encode_text` method.
+    :type model: torch.nn.Module
+    :param tokenizer: A tokenizer function that converts the input text into a format suitable for `model.encode_text`.
+                      Typically returns token IDs, attention masks, etc. as a torch.Tensor or similar structure.
+    :type tokenizer: callable
+    :param text: The input text (string or list of strings) to be encoded.
+    :type text: str or list[str]
+    :return: A PyTorch tensor of shape (batch_size, embedding_dim) containing the L2-normalized text embeddings.
+    :rtype: torch.Tensor
+    """
+    # Convert text to the appropriate tokenized representation
+    text_input = tokenizer(text)
+    # Run the model in no-grad mode (not tracking gradients, saving memory and compute)
+    with torch.no_grad():
+        text_features = model.encode_text(text_input)
+    # Normalize embeddings to unit length
+    text_embeddings = F.normalize(text_features, p=2, dim=-1)
+    return text_embeddings
+def encode_text_df(model, tokenizer, df, col_name):
+    """
+    Encodes text from a specified column in a pandas DataFrame using the given model and tokenizer,
+    returning a PyTorch tensor of normalized text embeddings.
+    :param model: A model object that provides an `encode_text` method.
+    :type model: torch.nn.Module
+    :param tokenizer: A tokenizer function that converts the input text into a format suitable for `model.encode_text`.
+    :type tokenizer: callable
+    :param df: A pandas DataFrame from which text will be extracted.
+    :type df: pandas.DataFrame
+    :param col_name: The name of the column in `df` that contains the text to be encoded.
+    :type col_name: str
+    :return: A PyTorch tensor containing the L2-normalized text embeddings,
+             where the shape is (number_of_rows, embedding_dim).
+    :rtype: torch.Tensor
+    """
+    # Prepare a list to hold each row's text embedding
+    text_embeddings = []
+    # Loop through each index in the DataFrame
+    for idx in df.index:
+        # Retrieve text from the specified column for the current row
+        text = df[df.index == idx][col_name][0]
+        # Encode the text using the provided model and tokenizer
+        text_features = encode_text(model, tokenizer, text)
+        # Accumulate the embedding tensor
+        text_embeddings.append(text_features)
+    # Convert the list of embeddings (likely shape [N, embedding_dim]) into a NumPy array, then to a torch tensor
+    text_embeddings = torch.from_numpy(np.array(text_embeddings))
+    # Normalize embeddings to unit length across the feature dimension
+    text_embeddings = F.normalize(text_embeddings, p=2, dim=-1)
+    return text_embeddings
+def get_pca_by_fit(tar_features, src_features):
+    """
+    Applies PCA to target features and transforms both target and source features using the fitted PCA model.
+    Combines the PCA-transformed features from both target and source datasets and returns the combined data
+    along with batch labels indicating the origin of each sample.
+    :param tar_features: Numpy array of target features (samples by features).
+    :param src_features: Numpy array of source features (samples by features).
+    :return:
+        - pca_comb_features: A numpy array containing PCA-transformed target and source features combined.
+        - pca_comb_features_batch: A numpy array of batch labels indicating which samples are from target (0) and source (1).
+    """
+    pca = PCA(n_components=3)
+    # Fit the PCA model on the target features (transposed to fit on features)
+    pca_fit_tar = pca.fit(tar_features.T)
+    # Transform the target and source features using the fitted PCA model
+    pca_tar = pca_fit_tar.transform(tar_features.T)  # Transform target features
+    pca_src = pca_fit_tar.transform(src_features.T)  # Transform source features using the same PCA fit
+    # Combine the PCA-transformed target and source features
+    pca_comb_features = np.concatenate((pca_tar, pca_src))
+    # Create a batch label array: 0 for target features, 1 for source features
+    pca_comb_features_batch = np.array([0] * len(pca_tar) + [1] * len(pca_src))
+    return pca_comb_features, pca_comb_features_batch
+def cap_quantile(weight, cap_max=None, cap_min=None):
+    """
+    Caps the values in the 'weight' array based on the specified quantile thresholds for maximum and minimum values.
+    If the quantile thresholds are provided, the function will replace values above or below these thresholds
+    with the corresponding quantile values.
+    :param weight: Numpy array of weights to be capped.
+    :param cap_max: Quantile threshold for the maximum cap. Values above this quantile will be capped.
+                    If None, no maximum capping will be applied.
+    :param cap_min: Quantile threshold for the minimum cap. Values below this quantile will be capped.
+                    If None, no minimum capping will be applied.
+    :return: Numpy array with the values capped at the specified quantiles.
+    """
+    # If a maximum cap is specified, calculate the value at the specified cap_max quantile
+    if cap_max is not None:
+        cap_max = np.quantile(weight, cap_max)  # Get the value at the cap_max quantile
+    # If a minimum cap is specified, calculate the value at the specified cap_min quantile
+    if cap_min is not None:
+        cap_min = np.quantile(weight, cap_min)  # Get the value at the cap_min quantile
+    # Cap the values in 'weight' array to not exceed the maximum cap (cap_max)
+    weight = np.minimum(weight, cap_max)
+    # Cap the values in 'weight' array to not go below the minimum cap (cap_min)
+    weight = np.maximum(weight, cap_min)
+    return weight
+def read_polygons(file_path, slide_id):
+    """
+    Reads polygon data from a JSON file for a specific slide ID, extracting coordinates, colors, and thickness.
+    :param file_path: Path to the JSON file containing polygon configurations.
+    :param slide_id: Identifier for the specific slide whose polygon data is to be extracted.
+    :return:
+        - polygons: A list of numpy arrays, where each array contains the coordinates of a polygon.
+        - polygon_colors: A list of color values corresponding to each polygon.
+        - polygon_thickness: A list of thickness values for each polygon's border.
+    """
+    # Open the JSON file and load the polygon configurations into a Python dictionary
+    with open(file_path, 'r') as f:
+        polygons_configs = json.load(f)
+    # Check if the given slide_id exists in the polygon configurations
+    if slide_id not in polygons_configs:
+        return None, None, None  # If slide_id is not found, return None for all outputs
+    # Extract the polygon coordinates, colors, and thicknesses for the given slide_id
+    polygons = [np.array(poly['coords']) for poly in polygons_configs[slide_id]]  # Convert polygon coordinates to numpy arrays
+    polygon_colors = [poly['color'] for poly in polygons_configs[slide_id]]  # Extract the color for each polygon
+    polygon_thickness = [poly['thickness'] for poly in polygons_configs[slide_id]]  # Extract the thickness for each polygon
+    # Return the polygons, their colors, and their thicknesses
+    return polygons, polygon_colors, polygon_thickness

src/requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+anndata==0.10.9
+matplotlib==3.9.2
+numpy==1.25.0
+pandas==2.2.3
+opencv-python==4.10.0.84
+pycpd==2.0.0
+torch==2.3.1
+tangram-sc==1.0.4
+tqdm==4.66.5
+torchvision==0.18.1
+open_clip_torch==2.26.1
+pillow==10.4.0
+ipykernel==6.29.5

src/setup.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import setuptools
+setuptools.setup(
+    name="loki",  # The name of your package on PyPI
+    version="0.0.1",  # Choose your initial release version
+    author="Weiqing Chen",
+    author_email="wec4005@med.cornell.edu",
+    description="The Loki platform offers 5 core functions: tissue alignment, tissue annotation, cell type decomposition, image-transcriptomics retrieval, and ST gene expression prediction",
+    packages=setuptools.find_packages(),  # Finds the 'loki' folder automatically
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: BSD 3-Clause License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.9',  # or the minimum version you support
+    install_requires=[
+        "anndata==0.10.9",
+        "matplotlib==3.9.2",
+        "numpy==1.25.0",
+        "pandas==2.2.3",
+        "opencv-python==4.10.0.84",
+        "pycpd==2.0.0",
+        "torch==2.3.1",
+        "tangram-sc==1.0.4",
+        "tqdm==4.66.5",
+        "torchvision==0.18.1",
+        "open_clip_torch==2.26.1",
+        "pillow==10.4.0",
+        "ipykernel==6.29.5",
+    ],
+)