Spaces:

anway
/

Spatial-Omics-Viewer

Running

App Files Files Community

Spatial-Omics-Viewer / utils /validator.py

anway

h5ad_viewer

05fdb87 verified 3 days ago

raw

history blame contribute delete

3.95 kB

	from typing import Tuple, List
	import numpy as np
	from anndata import AnnData


	class AnnDataValidator:
	"""Validate AnnData objects for spatial visualization requirements"""

	MAX_OBS = 500_000 # Max number of observations (cells/spots)
	MAX_VARS = 50_000 # Max number of variables (genes)

	@staticmethod
	def validate(adata: AnnData) -> Tuple[bool, List[str]]:
	"""
	Validate AnnData object for spatial visualization

	Args:
	adata: AnnData object to validate

	Returns:
	Tuple of (is_valid, error_messages)
	"""
	errors = []

	# Check spatial coordinates exist
	if "spatial" not in adata.obsm:
	errors.append(
	"Missing spatial coordinates. adata.obsm['spatial'] is required."
	)

	# Validate spatial coordinates format
	if "spatial" in adata.obsm:
	spatial = adata.obsm["spatial"]
	if spatial.shape[1] != 2:
	errors.append(
	f"Spatial coordinates must be 2D (x, y). Got shape: {spatial.shape}"
	)

	# Check number of observations
	if adata.n_obs > AnnDataValidator.MAX_OBS:
	errors.append(
	f"Too many observations: {adata.n_obs:,} (max: {AnnDataValidator.MAX_OBS:,})"
	)

	# Check number of variables
	if adata.n_vars > AnnDataValidator.MAX_VARS:
	errors.append(
	f"Too many variables: {adata.n_vars:,} (max: {AnnDataValidator.MAX_VARS:,})"
	)

	# Check if data is accessible
	try:
	_ = adata.var_names
	except Exception as e:
	errors.append(f"Cannot access variable names: {str(e)}")

	return (len(errors) == 0, errors)

	@staticmethod
	def validate_gene(adata: AnnData, gene_name: str) -> Tuple[bool, str]:
	"""
	Validate if a gene exists in the dataset

	Args:
	adata: AnnData object
	gene_name: Gene name to check

	Returns:
	Tuple of (exists, message)
	"""
	if gene_name not in adata.var_names:
	# Try to find similar gene names
	var_names = list(adata.var_names)
	similar = [g for g in var_names if gene_name.lower() in g.lower()][:5]

	if similar:
	return (
	False,
	f"Gene '{gene_name}' not found. Similar genes: {', '.join(similar)}",
	)
	else:
	return (False, f"Gene '{gene_name}' not found in dataset.")

	return (True, f"Gene '{gene_name}' found.")

	@staticmethod
	def get_gene_expression(adata: AnnData, gene_name: str) -> np.ndarray:
	"""
	Extract gene expression for a specific gene

	Args:
	adata: AnnData object
	gene_name: Gene name to extract

	Returns:
	Expression vector as numpy array

	Raises:
	ValueError: If gene not found
	"""
	is_valid, message = AnnDataValidator.validate_gene(adata, gene_name)
	if not is_valid:
	raise ValueError(message)

	# Extract gene expression (works with backed mode)
	gene_data = adata[:, gene_name].X

	# Convert to dense array if sparse
	if hasattr(gene_data, "toarray"):
	gene_data = gene_data.toarray()

	# Flatten if needed
	if gene_data.ndim > 1:
	gene_data = gene_data.flatten()

	return gene_data

	@staticmethod
	def get_gene_list(adata: AnnData, limit: int = 1000) -> List[str]:
	"""
	Get list of available genes (limited for performance)

	Args:
	adata: AnnData object
	limit: Maximum number of genes to return

	Returns:
	List of gene names
	"""
	var_names = list(adata.var_names)
	return var_names[:limit]