Loki / src /build /lib /loki /annotate.py

Upload 42 files

1e315b6 verified 9 months ago

4.2 kB

	import numpy as np
	import torch
	from torch.nn import functional as F
	import os
	import scanpy as sc
	import json
	import cv2



	def annotate_with_bulk(img_features, bulk_features, normalize=True, T=1, tensor=False):
	"""
	Annotates tissue image with similarity scores between image features and bulk RNA-seq features.

	:param img_features: Feature matrix representing histopathology image features.
	:param bulk_features: Feature vector representing bulk RNA-seq features.
	:param normalize: Whether to normalize similarity scores, default=True.
	:param T: Temperature parameter to control the sharpness of the softmax distribution. Higher values result in a smoother distribution.
	:param tensor: Feature format in torch tensor or not, default=False.

	:return: An array or tensor containing the normalized similarity scores.
	"""

	if tensor:
	# Compute similarity between image features and bulk RNA-seq features
	cosine_similarity = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
	similarity = cosine_similarity(img_features, bulk_features.unsqueeze(0)) # Shape: [n]

	# Optional normalization using the feature vector's norm
	if normalize:
	normalization_factor = torch.sqrt(torch.tensor([bulk_features.shape[0]], dtype=torch.float)) # sqrt(768)
	similarity = similarity / normalization_factor

	# Reshape and apply temperature scaling for softmax
	similarity = similarity.unsqueeze(0) # Shape: [1, n]
	similarity = similarity / T # Control distribution sharpness

	# Convert similarity scores to probability distribution using softmax
	similarity = torch.nn.functional.softmax(similarity, dim=-1) # Shape: [1, n]

	else:
	# Compute similarity for non-tensor mode
	similarity = np.dot(img_features.T, bulk_features)

	# Apply a softmax-like normalization for numerical stability
	max_similarity = np.max(similarity) # Maximum value for stability
	similarity = np.exp(similarity - max_similarity) / np.sum(np.exp(similarity - max_similarity))

	# Normalize similarity scores to [0, 1] range for interpretation
	similarity = (similarity - np.min(similarity)) / (np.max(similarity) - np.min(similarity))

	return similarity



	def annotate_with_marker_genes(classes, image_embeddings, all_text_embeddings):
	"""
	Annotates tissue image with similarity scores between image features and marker gene features.

	:param classes: A list or array of tissue type labels.
	:param image_embeddings: A numpy array or torch tensor of image embeddings (shape: [n_images, embedding_dim]).
	:param all_text_embeddings: A numpy array or torch tensor of text embeddings of the marker genes
	(shape: [n_classes, embedding_dim]).

	:return:
	- dot_similarity: The matrix of dot product similarities between image embeddings and text embeddings.
	- pred_class: The predicted tissue type for the image based on the highest similarity score.
	"""

	# Calculate dot product similarity between image embeddings and text embeddings
	# This results in a similarity matrix of shape [n_images, n_classes]
	dot_similarity = image_embeddings @ all_text_embeddings.T

	# Find the class with the highest similarity for each image
	# Use argmax to identify the index of the highest similarity score
	pred_class = classes[dot_similarity.argmax()]

	return dot_similarity, pred_class



	def load_image_annotation(image_path):
	"""
	Loads an image with annotation.

	:param image_path: The file path to the image.

	:return: The processed image, converted to BGR color space and of type uint8.
	"""

	# Load the image from the specified file path using OpenCV
	image = cv2.imread(image_path)

	# Convert the color from RGB (OpenCV loads as BGR by default) to BGR (which matches common color standards)
	image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

	# Ensure the image is of type uint8 for proper handling in OpenCV and other image processing libraries
	image = image.astype(np.uint8)

	return image