from typing import Dict, List, Optional, Tuple from dataclasses import dataclass from pathlib import Path from anndata import AnnData import datetime @dataclass class DataSource: """Represents a loaded h5ad data source""" id: str # Unique identifier name: str # Display name source_type: str # 'demo', 'url', 'upload' source_path: str # Original source (URL, file path, etc.) adata: Optional[AnnData] # The loaded AnnData object (Optional for lazy loading) loaded_at: Optional[datetime.datetime] # When it was loaded n_obs: int = 0 # Number of observations n_vars: int = 0 # Number of variables def get_display_name(self) -> str: """Get formatted display name with metadata""" if self.adata is not None: return f"{self.name} ({self.n_obs:,} cells, {self.n_vars:,} genes)" return f"{self.name} (Not loaded)" def get_info(self) -> str: """Get detailed information string""" return ( f"Dataset: {self.name}\n" f"Source: {self.source_type}\n" f"Cells/Spots: {self.n_obs:,}\n" f"Genes: {self.n_vars:,}\n" f"Loaded: {self.loaded_at.strftime('%Y-%m-%d %H:%M:%S')}" ) class DataSourceManager: """ Manage multiple loaded h5ad datasets This class handles: - Tracking all loaded datasets - Switching between datasets - Providing dataset metadata """ def __init__(self): self.sources: Dict[str, DataSource] = {} self.current_id: Optional[str] = None self._id_counter = 0 def add_source( self, name: str, source_type: str, source_path: str, adata: Optional[AnnData] = None ) -> str: """ Add a new data source Args: name: Display name for the dataset source_type: Type of source ('demo', 'url', 'upload') source_path: Original source location adata: Optional loaded AnnData object Returns: Unique ID of the added source """ # Check if already exists by source_path to avoid duplicates for existing_id, source in self.sources.items(): if source.source_path == source_path: if adata is not None and source.adata is None: # Update existing source with loaded adata source.adata = adata source.loaded_at = datetime.datetime.now() source.n_obs = adata.n_obs source.n_vars = adata.n_vars return existing_id # Generate unique ID source_id = f"ds_{self._id_counter}" self._id_counter += 1 # Create data source source = DataSource( id=source_id, name=name, source_type=source_type, source_path=source_path, adata=adata, loaded_at=datetime.datetime.now() if adata is not None else None, n_obs=adata.n_obs if adata is not None else 0, n_vars=adata.n_vars if adata is not None else 0 ) self.sources[source_id] = source # Set as current if it's the first one if self.current_id is None: self.current_id = source_id return source_id def get_source(self, source_id: str) -> Optional[DataSource]: """Get a data source by ID""" return self.sources.get(source_id) def get_current_source(self) -> Optional[DataSource]: """Get the currently active data source""" if self.current_id is None: return None return self.sources.get(self.current_id) def set_current(self, source_id: str) -> bool: """ Set the current active data source Args: source_id: ID of the source to activate Returns: True if successful, False if source not found """ if source_id in self.sources: self.current_id = source_id return True return False def get_all_sources(self) -> List[DataSource]: """Get list of all loaded data sources""" return list(self.sources.values()) def get_source_choices(self) -> List[Tuple[str, str]]: """ Get list of sources for dropdown/radio selection Returns: List of (display_name, source_id) tuples """ return [ (source.get_display_name(), source.id) for source in self.sources.values() ] def get_source_names(self) -> List[str]: """Get list of source display names""" return [source.name for source in self.sources.values()] def remove_source(self, source_id: str) -> bool: """ Remove a data source Args: source_id: ID of source to remove Returns: True if removed, False if not found """ if source_id in self.sources: del self.sources[source_id] # Update current_id if we removed the current source if self.current_id == source_id: if len(self.sources) > 0: self.current_id = list(self.sources.keys())[0] else: self.current_id = None return True return False def has_sources(self) -> bool: """Check if any sources are loaded""" return len(self.sources) > 0 def count_sources(self) -> int: """Get number of loaded sources""" return len(self.sources) def clear_all(self): """Remove all data sources""" self.sources.clear() self.current_id = None self._id_counter = 0