import numpy as np import torch from torch.nn import functional as F import os import scanpy as sc import json import cv2 def annotate_with_bulk(img_features, bulk_features, normalize=True, T=1, tensor=False): """ Annotates tissue image with similarity scores between image features and bulk RNA-seq features. :param img_features: Feature matrix representing histopathology image features. :param bulk_features: Feature vector representing bulk RNA-seq features. :param normalize: Whether to normalize similarity scores, default=True. :param T: Temperature parameter to control the sharpness of the softmax distribution. Higher values result in a smoother distribution. :param tensor: Feature format in torch tensor or not, default=False. :return: An array or tensor containing the normalized similarity scores. """ if tensor: # Compute similarity between image features and bulk RNA-seq features cosine_similarity = torch.nn.CosineSimilarity(dim=1, eps=1e-6) similarity = cosine_similarity(img_features, bulk_features.unsqueeze(0)) # Shape: [n] # Optional normalization using the feature vector's norm if normalize: normalization_factor = torch.sqrt(torch.tensor([bulk_features.shape[0]], dtype=torch.float)) # sqrt(768) similarity = similarity / normalization_factor # Reshape and apply temperature scaling for softmax similarity = similarity.unsqueeze(0) # Shape: [1, n] similarity = similarity / T # Control distribution sharpness # Convert similarity scores to probability distribution using softmax similarity = torch.nn.functional.softmax(similarity, dim=-1) # Shape: [1, n] else: # Compute similarity for non-tensor mode similarity = np.dot(img_features.T, bulk_features) # Apply a softmax-like normalization for numerical stability max_similarity = np.max(similarity) # Maximum value for stability similarity = np.exp(similarity - max_similarity) / np.sum(np.exp(similarity - max_similarity)) # Normalize similarity scores to [0, 1] range for interpretation similarity = (similarity - np.min(similarity)) / (np.max(similarity) - np.min(similarity)) return similarity def annotate_with_marker_genes(classes, image_embeddings, all_text_embeddings): """ Annotates tissue image with similarity scores between image features and marker gene features. :param classes: A list or array of tissue type labels. :param image_embeddings: A numpy array or torch tensor of image embeddings (shape: [n_images, embedding_dim]). :param all_text_embeddings: A numpy array or torch tensor of text embeddings of the marker genes (shape: [n_classes, embedding_dim]). :return: - dot_similarity: The matrix of dot product similarities between image embeddings and text embeddings. - pred_class: The predicted tissue type for the image based on the highest similarity score. """ # Calculate dot product similarity between image embeddings and text embeddings # This results in a similarity matrix of shape [n_images, n_classes] dot_similarity = image_embeddings @ all_text_embeddings.T # Find the class with the highest similarity for each image # Use argmax to identify the index of the highest similarity score pred_class = classes[dot_similarity.argmax()] return dot_similarity, pred_class def load_image_annotation(image_path): """ Loads an image with annotation. :param image_path: The file path to the image. :return: The processed image, converted to BGR color space and of type uint8. """ # Load the image from the specified file path using OpenCV image = cv2.imread(image_path) # Convert the color from RGB (OpenCV loads as BGR by default) to BGR (which matches common color standards) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Ensure the image is of type uint8 for proper handling in OpenCV and other image processing libraries image = image.astype(np.uint8) return image