Spaces:
Running
Running
| from typing import Tuple, List | |
| import numpy as np | |
| from anndata import AnnData | |
| class AnnDataValidator: | |
| """Validate AnnData objects for spatial visualization requirements""" | |
| MAX_OBS = 500_000 # Max number of observations (cells/spots) | |
| MAX_VARS = 50_000 # Max number of variables (genes) | |
| def validate(adata: AnnData) -> Tuple[bool, List[str]]: | |
| """ | |
| Validate AnnData object for spatial visualization | |
| Args: | |
| adata: AnnData object to validate | |
| Returns: | |
| Tuple of (is_valid, error_messages) | |
| """ | |
| errors = [] | |
| # Check spatial coordinates exist | |
| if "spatial" not in adata.obsm: | |
| errors.append( | |
| "Missing spatial coordinates. adata.obsm['spatial'] is required." | |
| ) | |
| # Validate spatial coordinates format | |
| if "spatial" in adata.obsm: | |
| spatial = adata.obsm["spatial"] | |
| if spatial.shape[1] != 2: | |
| errors.append( | |
| f"Spatial coordinates must be 2D (x, y). Got shape: {spatial.shape}" | |
| ) | |
| # Check number of observations | |
| if adata.n_obs > AnnDataValidator.MAX_OBS: | |
| errors.append( | |
| f"Too many observations: {adata.n_obs:,} (max: {AnnDataValidator.MAX_OBS:,})" | |
| ) | |
| # Check number of variables | |
| if adata.n_vars > AnnDataValidator.MAX_VARS: | |
| errors.append( | |
| f"Too many variables: {adata.n_vars:,} (max: {AnnDataValidator.MAX_VARS:,})" | |
| ) | |
| # Check if data is accessible | |
| try: | |
| _ = adata.var_names | |
| except Exception as e: | |
| errors.append(f"Cannot access variable names: {str(e)}") | |
| return (len(errors) == 0, errors) | |
| def validate_gene(adata: AnnData, gene_name: str) -> Tuple[bool, str]: | |
| """ | |
| Validate if a gene exists in the dataset | |
| Args: | |
| adata: AnnData object | |
| gene_name: Gene name to check | |
| Returns: | |
| Tuple of (exists, message) | |
| """ | |
| if gene_name not in adata.var_names: | |
| # Try to find similar gene names | |
| var_names = list(adata.var_names) | |
| similar = [g for g in var_names if gene_name.lower() in g.lower()][:5] | |
| if similar: | |
| return ( | |
| False, | |
| f"Gene '{gene_name}' not found. Similar genes: {', '.join(similar)}", | |
| ) | |
| else: | |
| return (False, f"Gene '{gene_name}' not found in dataset.") | |
| return (True, f"Gene '{gene_name}' found.") | |
| def get_gene_expression(adata: AnnData, gene_name: str) -> np.ndarray: | |
| """ | |
| Extract gene expression for a specific gene | |
| Args: | |
| adata: AnnData object | |
| gene_name: Gene name to extract | |
| Returns: | |
| Expression vector as numpy array | |
| Raises: | |
| ValueError: If gene not found | |
| """ | |
| is_valid, message = AnnDataValidator.validate_gene(adata, gene_name) | |
| if not is_valid: | |
| raise ValueError(message) | |
| # Extract gene expression (works with backed mode) | |
| gene_data = adata[:, gene_name].X | |
| # Convert to dense array if sparse | |
| if hasattr(gene_data, "toarray"): | |
| gene_data = gene_data.toarray() | |
| # Flatten if needed | |
| if gene_data.ndim > 1: | |
| gene_data = gene_data.flatten() | |
| return gene_data | |
| def get_gene_list(adata: AnnData, limit: int = 1000) -> List[str]: | |
| """ | |
| Get list of available genes (limited for performance) | |
| Args: | |
| adata: AnnData object | |
| limit: Maximum number of genes to return | |
| Returns: | |
| List of gene names | |
| """ | |
| var_names = list(adata.var_names) | |
| return var_names[:limit] | |