Loki / src /loki /decompose.py

Upload 42 files

1e315b6 verified 9 months ago

6.59 kB

	import pandas as pd
	import tangram as tg
	import numpy as np
	import torch
	import anndata
	from sklearn.decomposition import PCA
	from sklearn.neighbors import NearestNeighbors



	def generate_feature_ad(ad_expr, feature_path, sc=False):
	"""
	Generates an AnnData object with OmiCLIP text or image embeddings.

	:param ad_expr: AnnData object containing metadata for the dataset.
	:param feature_path: Path to the CSV file containing the features to be loaded.
	:param sc: Boolean flag indicating whether to copy single-cell metadata or ST metadata. Default is False (ST).
	:return: A new AnnData object with the loaded features and relevant metadata from ad_expr.
	"""

	# Load features from the CSV file. The index should match the cells/spots in ad_expr.obs.index.
	features = pd.read_csv(feature_path, index_col=0)[ad_expr.obs.index]

	# Create a new AnnData object with the features, transposing them to have cells/spots as rows
	feature_ad = anndata.AnnData(features[ad_expr.obs.index].T)

	# Copy relevant metadata from ad_expr based on the sc flag
	if sc:
	# If the data is single-cell (sc), copy the metadata from ad_expr.obs
	feature_ad.obs = ad_expr.obs.copy()
	else:
	# If the data is spatial, copy the 'cell_num', 'spatial' info, and spatial coordinates
	feature_ad.obs['cell_num'] = ad_expr.obs['cell_num'].copy()
	feature_ad.uns['spatial'] = ad_expr.uns['spatial'].copy()
	feature_ad.obsm['spatial'] = ad_expr.obsm['spatial'].copy()

	return feature_ad



	def normalize_percentile(df, cols, min_percentile=5, max_percentile=95):
	"""
	Clips and normalizes the specified columns of a DataFrame based on percentile thresholds,
	transforming their values to the [0, 1] range.

	:param df: A pandas DataFrame containing the columns to normalize.
	:type df: pandas.DataFrame
	:param cols: A list of column names in `df` that should be normalized.
	:type cols: list[str]
	:param min_percentile: The lower percentile used for clipping (defaults to 5).
	:type min_percentile: float
	:param max_percentile: The upper percentile used for clipping (defaults to 95).
	:type max_percentile: float
	:return: The same DataFrame with specified columns clipped and normalized.
	:rtype: pandas.DataFrame
	"""

	# Iterate over each column that needs to be normalized
	for col in cols:
	# Compute the lower and upper values at the given percentiles
	min_val = np.percentile(df[col], min_percentile)
	max_val = np.percentile(df[col], max_percentile)

	# Clip the column's values between these percentile thresholds
	df[col] = np.clip(df[col], min_val, max_val)

	# Perform min-max normalization to scale the clipped values to the [0, 1] range
	df[col] = (df[col] - min_val) / (max_val - min_val)

	return df



	def cell_type_decompose(sc_ad, st_ad, cell_type_col='cell_type', NMS_mode=False, major_types=None, min_percentile=5, max_percentile=95):
	"""
	Performs cell type decomposition on spatial data (ST or image) with single-cell data .

	:param sc_ad: AnnData object containing single-cell meta data.
	:param st_ad: AnnData object containing spatial data (ST or image) meta data.
	:param cell_type_col: The column name in `sc_ad.obs` that contains cell type annotations. Default is 'cell_type'.
	:param NMS_mode: Boolean flag to apply Non-Maximum Suppression (NMS) mode. Default is False.
	:param major_types: Major cell types used for NMS mode. Default is None.
	:param min_percentile: The lower percentile used for clipping (defaults to 5).
	:param max_percentile: The upper percentile used for clipping (defaults to 95).
	:return: The spatial AnnData object with projected cell type annotations.
	"""

	# Preprocess the data for decomposition using tangram (tg)
	tg.pp_adatas(sc_ad, st_ad, genes=None) # Preprocessing: match genes between single-cell and spatial data


	# Map single-cell data to spatial data using Tangram's "map_cells_to_space" function
	ad_map = tg.map_cells_to_space(
	sc_ad, st_ad,
	mode="clusters", # Map based on clusters (cell types)
	cluster_label=cell_type_col, # Column in `sc_ad.obs` representing cell type
	device='cpu', # Run on CPU (or 'cuda' if GPU is available)
	scale=False, # Don't scale data (can be set to True if needed)
	density_prior='uniform', # Use prior information for cell densities
	random_state=10, # Set random state for reproducibility
	verbose=False, # Disable verbose output for cleaner logging
	)

	# Project cell type annotations from the single-cell data to the spatial data
	tg.project_cell_annotations(ad_map, st_ad, annotation=cell_type_col)


	if NMS_mode:
	major_types = major_types
	st_ad.obs = normalize_percentile(st_ad.obsm['tangram_ct_pred'], major_types, min_percentile, max_percentile)

	st_ad_binary = st_ad.obsm['tangram_ct_pred'][major_types].copy()
	# Retain the max value in each row and set the rest to 0
	st_ad.obs[major_types] = st_ad_binary.where(st_ad_binary.eq(st_ad_binary.max(axis=1), axis=0), other=0)

	return st_ad # Return the spatial AnnData object with the projected annotations



	def assign_cells_to_spots(cell_locs, spot_locs, patch_size=16):
	"""
	Assigns cells to spots based on their spatial coordinates. Each cell within the specified patch size (radius)
	of a spot will be assigned to that spot.

	:param cell_locs: Numpy array of shape (n_cells, 2) with the x, y coordinates of the cells.
	:param spot_locs: Numpy array of shape (n_spots, 2) with the x, y coordinates of the spots.
	:param patch_size: The diameter of the spot patch. The radius used for assignment will be half of this value.
	:return: A sparse matrix where each row corresponds to a cell and each column corresponds to a spot.
	The value is 1 if the cell is assigned to that spot, 0 otherwise.
	"""
	# Initialize the NearestNeighbors model with a radius equal to half the patch size
	neigh = NearestNeighbors(radius=patch_size * 0.5)

	# Fit the model on the spot locations
	neigh.fit(spot_locs)

	# Create the radius neighbors graph which will assign cells to spots based on proximity
	# This graph is a sparse matrix where rows are cells and columns are spots, with a 1 indicating assignment
	A = neigh.radius_neighbors_graph(cell_locs, mode='connectivity')

	return A