Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| HeartMAP Web Interface - Gradio app for Hugging Face Spaces deployment | |
| Comprehensive chamber-specific cardiac analysis platform | |
| """ | |
| import gradio as gr | |
| import tempfile | |
| import sys | |
| import shutil | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from pathlib import Path | |
| from typing import Tuple, List, Dict | |
| # Add src to path | |
| sys.path.insert(0, 'src') | |
| try: | |
| import scanpy as sc | |
| import anndata as ad | |
| from heartmap import Config | |
| from heartmap.pipelines import ( | |
| BasicPipeline, | |
| ComprehensivePipeline, | |
| MultiChamberPipeline, | |
| AdvancedCommunicationPipeline | |
| ) | |
| from heartmap.data.lr_database import get_ligand_receptor_pairs, LigandReceptorDatabase | |
| LR_DATABASE_AVAILABLE = True | |
| HEARTMAP_AVAILABLE = True | |
| except ImportError as e: | |
| print(f"ImportError: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| HEARTMAP_AVAILABLE = False | |
| def load_and_validate_data(uploaded_file) -> Tuple[ad.AnnData, str]: | |
| """Load and validate uploaded single-cell data in various formats with compatibility fallbacks | |
| Supported formats: | |
| - AnnData: .h5ad, .h5mu, .zarr | |
| - 10X: .h5, .mtx (with genes/barcodes) | |
| - Seurat: .rds, .rdata (h5seurat) | |
| - Loom: .loom | |
| - Text: .csv, .tsv, .txt (expression matrices) | |
| - Archives: .tar, .tar.gz, .tgz | |
| - HDF5: .hdf5, .h5 (generic) | |
| - Parquet: .parquet | |
| - And more... | |
| """ | |
| import h5py | |
| import tarfile | |
| import tempfile | |
| import shutil | |
| from pathlib import Path | |
| # Determine file type | |
| filename = uploaded_file if isinstance(uploaded_file, str) else getattr(uploaded_file, 'name', str(uploaded_file)) | |
| file_lower = filename.lower() | |
| filepath = Path(uploaded_file) | |
| # Categorize file types | |
| is_h5ad = file_lower.endswith('.h5ad') | |
| is_h5 = file_lower.endswith('.h5') and not file_lower.endswith('.h5ad') and not file_lower.endswith('.h5mu') | |
| is_tar = file_lower.endswith(('.tar', '.tar.gz', '.tgz')) | |
| is_loom = file_lower.endswith('.loom') | |
| is_mtx = file_lower.endswith(('.mtx', '.mtx.gz')) | |
| is_text = file_lower.endswith(('.csv', '.tsv', '.txt', '.csv.gz', '.tsv.gz', '.txt.gz')) | |
| is_parquet = file_lower.endswith('.parquet') | |
| is_hdf5 = file_lower.endswith('.hdf5') | |
| is_h5mu = file_lower.endswith('.h5mu') | |
| is_zarr = file_lower.endswith('.zarr') or filepath.is_dir() | |
| is_rds = file_lower.endswith(('.rds', '.rdata')) | |
| is_h5seurat = 'seurat' in file_lower and file_lower.endswith('.h5') | |
| is_arrow = file_lower.endswith('.arrow') | |
| is_json = file_lower.endswith(('.json', '.geojson')) | |
| try: | |
| # Handle TAR archives (GEO datasets) | |
| if is_tar: | |
| print("Detected TAR archive (GEO format). Extracting files...") | |
| temp_dir = tempfile.mkdtemp() | |
| try: | |
| # Extract tar file (with filter for Python 3.12+ compatibility) | |
| with tarfile.open(uploaded_file, 'r:*') as tar: | |
| # Use data filter for Python 3.12+ to suppress warning | |
| try: | |
| tar.extractall(temp_dir, filter='data') | |
| except TypeError: | |
| # Fallback for older Python versions that don't support filter | |
| tar.extractall(temp_dir) | |
| print(f"✓ Extracted {len(tar.getmembers())} files") | |
| # Look for compatible files recursively | |
| extracted_files = list(Path(temp_dir).rglob('*')) | |
| # Debug: Show what files were found | |
| print(f" Analyzing {len(extracted_files)} extracted items...") | |
| file_extensions = {} | |
| for f in extracted_files: | |
| if f.is_file(): | |
| ext = f.suffix.lower() | |
| file_extensions[ext] = file_extensions.get(ext, 0) + 1 | |
| # Also check full name for multi-extension files | |
| if ext == '.gz': | |
| # Check what's before .gz | |
| name_lower = f.name.lower() | |
| if '.h5ad.gz' in name_lower: | |
| file_extensions['.h5ad.gz'] = file_extensions.get('.h5ad.gz', 0) + 1 | |
| elif '.h5.gz' in name_lower: | |
| file_extensions['.h5.gz'] = file_extensions.get('.h5.gz', 0) + 1 | |
| elif '.mtx.gz' in name_lower: | |
| file_extensions['.mtx.gz'] = file_extensions.get('.mtx.gz', 0) + 1 | |
| if file_extensions: | |
| print(f" File types found: {file_extensions}") | |
| # Check for files with full name patterns (including .gz) - RECURSIVELY | |
| h5ad_files = [f for f in extracted_files if f.is_file() and (f.name.lower().endswith('.h5ad') or f.name.lower().endswith('.h5ad.gz'))] | |
| h5_files = [f for f in extracted_files if f.is_file() and (f.name.lower().endswith('.h5') or f.name.lower().endswith('.h5.gz')) | |
| and not f.name.lower().endswith('.h5ad') and not f.name.lower().endswith('.h5ad.gz')] | |
| # Find matrix.mtx files and their parent directories (support nested structures) | |
| mtx_files_found = [f for f in extracted_files if f.is_file() and 'matrix.mtx' in f.name.lower()] | |
| print(f" Found {len(mtx_files_found)} matrix.mtx files") | |
| # For each matrix file, check if its directory contains the required companions | |
| valid_mtx_dirs = [] | |
| seen_dirs = set() # Avoid duplicates | |
| for mtx_file in mtx_files_found: | |
| mtx_dir = mtx_file.parent | |
| # Skip if we've already processed this directory | |
| if str(mtx_dir) in seen_dirs: | |
| continue | |
| seen_dirs.add(str(mtx_dir)) | |
| # Check for genes/features and barcodes in the same directory | |
| dir_files = [f.name.lower() for f in mtx_dir.iterdir() if f.is_file()] | |
| has_genes = any('genes.tsv' in fn or 'features.tsv' in fn for fn in dir_files) | |
| has_barcodes = any('barcodes.tsv' in fn for fn in dir_files) | |
| if has_genes and has_barcodes: | |
| valid_mtx_dirs.append(mtx_dir) | |
| rel_path = mtx_dir.relative_to(temp_dir) if mtx_dir != Path(temp_dir) else Path('.') | |
| print(f" ✓ Valid 10x directory found: {rel_path}") | |
| mtx_dirs = valid_mtx_dirs | |
| print(f" Total unique valid directories: {len(mtx_dirs)}") | |
| # If no valid directories found yet, try looking for common 10x folder structures | |
| if not mtx_dirs and not h5ad_files and not h5_files: | |
| print(" Searching for common 10x folder structures...") | |
| # Common Cell Ranger output paths | |
| common_paths = [ | |
| 'filtered_feature_bc_matrix', | |
| 'outs/filtered_feature_bc_matrix', | |
| 'filtered_gene_bc_matrices', | |
| 'raw_feature_bc_matrix', | |
| 'outs/raw_feature_bc_matrix' | |
| ] | |
| for common_path in common_paths: | |
| search_dirs = list(Path(temp_dir).rglob(common_path)) | |
| for search_dir in search_dirs: | |
| if search_dir.is_dir(): | |
| dir_files = [f.name.lower() for f in search_dir.iterdir() if f.is_file()] | |
| has_matrix = any('matrix.mtx' in fn for fn in dir_files) | |
| has_genes = any('genes.tsv' in fn or 'features.tsv' in fn for fn in dir_files) | |
| has_barcodes = any('barcodes.tsv' in fn for fn in dir_files) | |
| if has_matrix and has_genes and has_barcodes: | |
| mtx_dirs.append(search_dir) | |
| print(f" ✓ Found 10x data in: {search_dir.relative_to(temp_dir)}") | |
| if h5ad_files: | |
| print(f"✓ Found {len(h5ad_files)} .h5ad file(s). Loading first one...") | |
| h5ad_file = h5ad_files[0] | |
| # If gzipped, decompress first | |
| if h5ad_file.name.endswith('.gz'): | |
| print(f" Decompressing {h5ad_file.name}...") | |
| import gzip | |
| decompressed_path = h5ad_file.parent / h5ad_file.stem # Remove .gz | |
| with gzip.open(h5ad_file, 'rb') as f_in: | |
| with open(decompressed_path, 'wb') as f_out: | |
| f_out.write(f_in.read()) | |
| print(f" ✓ Decompressed to {decompressed_path.name}") | |
| adata = sc.read_h5ad(str(decompressed_path)) | |
| else: | |
| adata = sc.read_h5ad(str(h5ad_file)) | |
| elif h5_files: | |
| print(f"✓ Found {len(h5_files)} .h5 file(s). Loading first one...") | |
| # Try loading as 10X format first, fall back to AnnData h5ad format | |
| h5_loaded = False | |
| for h5_file in h5_files: | |
| # Decompress if gzipped | |
| file_to_load = h5_file | |
| if h5_file.name.endswith('.gz'): | |
| print(f" Decompressing {h5_file.name}...") | |
| import gzip | |
| decompressed_path = h5_file.parent / h5_file.stem # Remove .gz | |
| with gzip.open(h5_file, 'rb') as f_in: | |
| with open(decompressed_path, 'wb') as f_out: | |
| f_out.write(f_in.read()) | |
| print(f" ✓ Decompressed to {decompressed_path.name}") | |
| file_to_load = decompressed_path | |
| try: | |
| print(f" Trying to load {file_to_load.name} as 10X format...") | |
| adata = sc.read_10x_h5(str(file_to_load)) | |
| h5_loaded = True | |
| print(f" ✓ Successfully loaded as 10X format") | |
| break | |
| except Exception as e1: | |
| print(f" ⚠ Not 10X format: {str(e1)[:100]}") | |
| try: | |
| print(f" Trying to load {file_to_load.name} as AnnData format...") | |
| adata = sc.read_h5ad(str(file_to_load)) | |
| h5_loaded = True | |
| print(f" ✓ Successfully loaded as AnnData format") | |
| break | |
| except Exception as e2: | |
| print(f" ⚠ Not AnnData format: {str(e2)[:100]}") | |
| continue | |
| if not h5_loaded: | |
| print(" ⚠ No .h5 files could be loaded, trying other formats...") | |
| adata = None | |
| elif mtx_dirs: | |
| print(f"✓ Found {len(mtx_dirs)} matrix.mtx directories. Loading first valid one...") | |
| adata = None | |
| # Try each directory until one loads successfully | |
| for idx, mtx_dir in enumerate(mtx_dirs): | |
| try: | |
| print(f" Attempting to load from: {mtx_dir.relative_to(temp_dir) if mtx_dir != Path(temp_dir) else 'root'}") | |
| # List all files in this directory for debugging | |
| all_files = [f for f in mtx_dir.iterdir() if f.is_file()] | |
| print(f" Files in directory: {[f.name for f in all_files[:10]]}") # Show first 10 | |
| # Find the actual files (case-insensitive) | |
| mtx_files = [f for f in all_files if 'matrix.mtx' in f.name.lower()] | |
| gene_files = [f for f in all_files if 'genes.tsv' in f.name.lower() or 'features.tsv' in f.name.lower()] | |
| barcode_files = [f for f in all_files if 'barcodes.tsv' in f.name.lower()] | |
| print(f" Matrix files: {[f.name for f in mtx_files]}") | |
| print(f" Gene/feature files: {[f.name for f in gene_files]}") | |
| print(f" Barcode files: {[f.name for f in barcode_files]}") | |
| if not mtx_files or not gene_files or not barcode_files: | |
| print(f" ⚠ Missing required files, skipping...") | |
| continue | |
| # Check if files have standard names or prefixed names | |
| has_standard_names = any(f.name.lower() in ['matrix.mtx', 'matrix.mtx.gz'] for f in mtx_files) | |
| if not has_standard_names and len(mtx_files) > 0: | |
| # Multiple samples with prefixed names - need to handle differently | |
| print(f" ⚠ Non-standard naming detected ({len(mtx_files)} samples with prefixes)") | |
| print(f" This archive contains multiple samples. Loading first sample: {mtx_files[0].stem}") | |
| # Find matching features and barcodes for the first sample | |
| # Extract sample prefix (e.g., 'GSM4307515_N-1-LVP' from 'GSM4307515_N-1-LVP_matrix.mtx.gz') | |
| first_mtx = mtx_files[0] | |
| sample_prefix = first_mtx.name.replace('_matrix.mtx.gz', '').replace('_matrix.mtx', '') | |
| matching_features = [f for f in gene_files if sample_prefix in f.name] | |
| matching_barcodes = [f for f in barcode_files if sample_prefix in f.name] | |
| if not matching_features or not matching_barcodes: | |
| print(f" ⚠ Could not find matching features/barcodes for {sample_prefix}") | |
| continue | |
| # Decompress and create symlinks with standard names | |
| import gzip | |
| standard_matrix = mtx_dir / 'matrix.mtx' | |
| standard_features = mtx_dir / 'features.tsv' | |
| standard_barcodes = mtx_dir / 'barcodes.tsv' | |
| # Decompress and copy to standard names | |
| for src, dst in [(first_mtx, standard_matrix), | |
| (matching_features[0], standard_features), | |
| (matching_barcodes[0], standard_barcodes)]: | |
| if src.name.endswith('.gz'): | |
| print(f" Extracting {src.name} → {dst.name}") | |
| with gzip.open(src, 'rb') as f_in: | |
| with open(dst, 'wb') as f_out: | |
| f_out.write(f_in.read()) | |
| else: | |
| import shutil | |
| shutil.copy(src, dst) | |
| print(f" ✓ Created standard 10x structure for sample: {sample_prefix}") | |
| else: | |
| # Standard naming - decompress .gz files if present | |
| import gzip | |
| for file_list in [mtx_files, gene_files, barcode_files]: | |
| for f in file_list: | |
| if f.name.endswith('.gz'): | |
| decompressed_path = f.parent / f.name[:-3] # Remove .gz | |
| if not decompressed_path.exists(): | |
| print(f" Decompressing {f.name}...") | |
| with gzip.open(f, 'rb') as f_in: | |
| with open(decompressed_path, 'wb') as f_out: | |
| f_out.write(f_in.read()) | |
| print(f" ✓ Decompressed to {decompressed_path.name}") | |
| # Try to read the MTX directory | |
| adata = sc.read_10x_mtx(str(mtx_dir)) | |
| print(f" ✓ Successfully loaded MTX format from directory {idx+1}/{len(mtx_dirs)}") | |
| break # Success, exit loop | |
| except Exception as mtx_err: | |
| print(f" ⚠ Failed to load from directory {idx+1}: {str(mtx_err)[:200]}") | |
| if idx < len(mtx_dirs) - 1: | |
| print(f" Trying next directory...") | |
| continue | |
| if adata is None: | |
| print(f" ⚠ Could not load any of the {len(mtx_dirs)} matrix directories") | |
| else: | |
| adata = None | |
| # If h5 files failed or no standard format found, try text files | |
| if adata is None: | |
| print("Searching for readable text files...") | |
| adata = None | |
| for ext_file in extracted_files: | |
| if ext_file.is_file(): | |
| try: | |
| if ext_file.suffix in ['.txt', '.csv', '.tsv', '.txt.gz', '.csv.gz', '.tsv.gz']: | |
| print(f" Attempting to read as expression matrix: {ext_file.name}") | |
| adata = sc.read_csv(str(ext_file), delimiter='\t' if 'tsv' in ext_file.name else ',') | |
| break | |
| except Exception as read_err: | |
| print(f"⚠ Failed to read {ext_file.name}: {str(read_err)}") | |
| continue | |
| if adata is None: | |
| shutil.rmtree(temp_dir) | |
| # Provide detailed error message about what was found | |
| error_details = f" **TAR Archive Extraction Failed**\n\n" | |
| error_details += f"Extracted {len([f for f in extracted_files if f.is_file()])} files but couldn't find compatible single-cell data.\n\n" | |
| error_details += f"**File types detected:** {file_extensions}\n\n" | |
| error_details += f"**What we looked for:**\n" | |
| error_details += f"- .h5ad files: Found {len(h5ad_files)}\n" | |
| error_details += f"- .h5 files: Found {len(h5_files)}\n" | |
| error_details += f"- matrix.mtx directories (with genes + barcodes): Found {len(valid_mtx_dirs)}\n\n" | |
| error_details += f"**Common issues:**\n" | |
| error_details += f"- Files are in non-standard nested directories\n" | |
| error_details += f"- Missing required companion files (genes.tsv, barcodes.tsv)\n" | |
| error_details += f"- Files use different compression or naming\n\n" | |
| error_details += f"**Recommendation:** Extract the archive locally, locate the filtered_feature_bc_matrix folder or .h5ad file, and upload that specific file/folder." | |
| return None, error_details | |
| if adata is not None: | |
| print(f"✓ Successfully loaded from TAR archive: {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| else: | |
| shutil.rmtree(temp_dir) | |
| # Provide detailed error message | |
| error_details = f" **TAR Archive Loading Failed**\n\n" | |
| error_details += f"Found potential data files but couldn't load them.\n\n" | |
| error_details += f"**File types detected:** {file_extensions}\n\n" | |
| error_details += f"**Files found:**\n" | |
| error_details += f"- .h5ad: {len(h5ad_files)}\n" | |
| error_details += f"- .h5: {len(h5_files)}\n" | |
| error_details += f"- Valid matrix.mtx directories: {len(valid_mtx_dirs)}\n\n" | |
| error_details += f"Try extracting and uploading the data file directly instead of as TAR." | |
| return None, error_details | |
| # Cleanup temp directory | |
| shutil.rmtree(temp_dir) | |
| except Exception as e: | |
| if Path(temp_dir).exists(): | |
| shutil.rmtree(temp_dir) | |
| raise e | |
| # Try reading based on file type | |
| elif is_h5ad: | |
| print(" Loading AnnData (.h5ad)...") | |
| # Try with retry logic for file locking issues | |
| import time | |
| max_retries = 3 | |
| retry_delay = 2 # seconds | |
| for attempt in range(max_retries): | |
| try: | |
| # Use 'r' mode explicitly and ensure file is properly closed | |
| adata = sc.read_h5ad(uploaded_file, backed=None) | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| break | |
| except PermissionError as perm_err: | |
| if attempt < max_retries - 1: | |
| print(f" ⚠ File locked, retrying in {retry_delay}s... (attempt {attempt + 1}/{max_retries})") | |
| time.sleep(retry_delay) | |
| retry_delay *= 2 # Exponential backoff | |
| else: | |
| # If all retries fail, try copying to a new location | |
| print(f" ⚠ File still locked after {max_retries} attempts, trying workaround...") | |
| try: | |
| import shutil | |
| temp_copy = tempfile.NamedTemporaryFile(suffix='.h5ad', delete=False) | |
| temp_copy_path = temp_copy.name | |
| temp_copy.close() | |
| # Copy file to new location | |
| shutil.copy2(uploaded_file, temp_copy_path) | |
| print(f" ✓ Copied to temporary location") | |
| # Try reading from copy | |
| adata = sc.read_h5ad(temp_copy_path, backed=None) | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| # Clean up temp file | |
| try: | |
| Path(temp_copy_path).unlink() | |
| except: | |
| pass | |
| break | |
| except Exception as copy_err: | |
| return None, (f" **File Access Error**\n\n" | |
| f"Cannot access the uploaded file. This usually happens when:\n" | |
| f"- The file is open in another program\n" | |
| f"- Antivirus is scanning the file\n" | |
| f"- Insufficient permissions\n\n" | |
| f"**Solution:** Close the file in other programs and try again.\n\n" | |
| f"Technical details: {str(perm_err)}") | |
| elif is_h5mu: | |
| print(" Loading MuData (.h5mu)...") | |
| try: | |
| import mudata | |
| mdata = mudata.read_h5mu(uploaded_file) | |
| adata = mdata.mod[list(mdata.mod.keys())[0]] | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| except ImportError: | |
| return None, " MuData support requires: pip install mudata" | |
| elif is_zarr: | |
| print(" Loading Zarr array...") | |
| adata = sc.read_zarr(uploaded_file) | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| elif is_h5 and not is_h5seurat: | |
| print(" Loading 10X Genomics (.h5)...") | |
| adata = sc.read_10x_h5(uploaded_file) | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| elif is_mtx: | |
| print(" Loading Matrix Market (.mtx)...") | |
| mtx_dir = filepath.parent | |
| if (mtx_dir / 'genes.tsv').exists(): | |
| adata = sc.read_10x_mtx(str(mtx_dir)) | |
| else: | |
| adata = sc.read_mtx(uploaded_file).T | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| elif is_loom: | |
| print(" Loading Loom (.loom)...") | |
| adata = sc.read_loom(uploaded_file) | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| elif is_text: | |
| print(f" Loading text matrix ({filepath.suffix})...") | |
| delim = '\t' if 'tsv' in file_lower else ',' | |
| adata = sc.read_csv(uploaded_file, delimiter=delim, first_column_names=True) | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| elif is_parquet or is_arrow: | |
| print(f" Loading {'Parquet' if is_parquet else 'Arrow'}...") | |
| try: | |
| import pandas as pd | |
| df = pd.read_parquet(uploaded_file) if is_parquet else pd.read_feather(uploaded_file) | |
| adata = ad.AnnData(df) | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| except ImportError: | |
| return None, " Parquet/Arrow requires: pip install pyarrow" | |
| elif is_hdf5: | |
| print(" Loading generic HDF5...") | |
| with h5py.File(uploaded_file, 'r') as f: | |
| X = f.get('matrix', f.get('X', f.get('data', None))) | |
| if X is None: | |
| return None, " HDF5 file missing required keys (matrix/X/data)" | |
| adata = ad.AnnData(X[()]) | |
| print(f"✓ {adata.n_obs:,} cells × {adata.n_vars:,} genes") | |
| elif is_h5seurat or is_rds: | |
| filename = filepath.name | |
| return None, (f" **Seurat/R Format Detected: `{filename}`**\n\n" | |
| f"This file format requires conversion to AnnData (.h5ad) before analysis.\n\n" | |
| f"**Option 1: Using SeuratDisk (Recommended)**\n" | |
| f"```r\n" | |
| f"# In R/RStudio:\n" | |
| f"library(Seurat)\n" | |
| f"library(SeuratDisk)\n\n" | |
| f"# Load your Seurat object\n" | |
| f"seurat_obj <- readRDS('{filename}')\n\n" | |
| f"# Convert to h5ad format\n" | |
| f"SaveH5Seurat(seurat_obj, filename = 'output.h5Seurat')\n" | |
| f"Convert('output.h5Seurat', dest = 'h5ad')\n" | |
| f"```\n\n" | |
| f"**Option 2: Using sceasy**\n" | |
| f"```r\n" | |
| f"library(sceasy)\n" | |
| f"sceasy::convertFormat(seurat_obj, from='seurat', to='anndata',\n" | |
| f" outFile='output.h5ad')\n" | |
| f"```\n\n" | |
| f"Then upload the generated `output.h5ad` file to HeartMAP.\n\n" | |
| f"**Need help?** See [FORMAT_SUPPORT_GUIDE.md](https://github.com/Tumo505/HeartMap/blob/master/FORMAT_SUPPORT_GUIDE.md) for detailed instructions.") | |
| else: | |
| return None, (f" Unsupported format: {filepath.suffix}\n\n" | |
| f"**Supported formats:**\n" | |
| f"• AnnData: .h5ad, .h5mu (MuData), .zarr\n" | |
| f"• 10X: .h5, .mtx (Matrix Market)\n" | |
| f"• Loom: .loom\n" | |
| f"• Text: .csv, .tsv, .txt\n" | |
| f"• Archives: .tar, .tar.gz, .tgz\n" | |
| f"• Columnar: .parquet, .arrow\n" | |
| f"• HDF5: .hdf5\n\n" | |
| f"For FASTQ/BAM/CRAM, use Cell Ranger or similar tools first.") | |
| except Exception as e: | |
| # Handle version compatibility issues | |
| error_str = str(e) | |
| is_compat_error = ( | |
| "IOSpec" in error_str or | |
| "encoding_type" in error_str or | |
| "No read method registered" in error_str or | |
| "unexpected keyword argument" in error_str or | |
| "'matrix'" in error_str or | |
| "AnnData.__init__" in error_str | |
| ) | |
| if is_compat_error: | |
| print(f"AnnData version compatibility issue detected: {error_str[:100]}") | |
| print(f" Attempting fallback read method...") | |
| try: | |
| # Fallback 1: Read with backed mode then load into memory | |
| adata = sc.read_h5ad(uploaded_file, backed='r') | |
| adata = adata.to_memory() | |
| print("✓ Successfully loaded using backed mode") | |
| except Exception as e2: | |
| try: | |
| # Fallback 2: Read manually from h5py with proper structure handling | |
| print(" Attempting manual h5py read...") | |
| with h5py.File(uploaded_file, 'r') as f: | |
| # Read X matrix (handle different storage formats) | |
| if 'X' in f: | |
| X_group = f['X'] | |
| if isinstance(X_group, h5py.Dataset): | |
| X = X_group[:] | |
| else: | |
| # Sparse matrix format | |
| try: | |
| from scipy import sparse | |
| data = X_group['data'][:] | |
| indices = X_group['indices'][:] | |
| indptr = X_group['indptr'][:] | |
| shape = X_group['shape'][:] | |
| X = sparse.csr_matrix((data, indices, indptr), shape=shape) | |
| except: | |
| X = X_group['data'][:] # Fallback to data only | |
| else: | |
| raise ValueError("No X matrix found in file") | |
| # Read obs (cell metadata) | |
| obs_dict = {} | |
| if 'obs' in f: | |
| obs_group = f['obs'] | |
| for key in obs_group.keys(): | |
| try: | |
| data = obs_group[key][:] | |
| # Decode bytes if necessary | |
| if data.dtype.kind == 'S' or data.dtype.kind == 'O': | |
| data = [x.decode('utf-8') if isinstance(x, bytes) else str(x) for x in data] | |
| obs_dict[key] = data | |
| except Exception as e_key: | |
| print(f"Skipping obs key '{key}': {e_key}") | |
| obs = pd.DataFrame(obs_dict) if obs_dict else pd.DataFrame(index=range(X.shape[0])) | |
| # Read var (gene metadata) | |
| var_dict = {} | |
| if 'var' in f: | |
| var_group = f['var'] | |
| for key in var_group.keys(): | |
| try: | |
| data = var_group[key][:] | |
| # Decode bytes if necessary | |
| if data.dtype.kind == 'S' or data.dtype.kind == 'O': | |
| data = [x.decode('utf-8') if isinstance(x, bytes) else str(x) for x in data] | |
| var_dict[key] = data | |
| except Exception as e_key: | |
| print(f"Skipping var key '{key}': {e_key}") | |
| var = pd.DataFrame(var_dict) if var_dict else pd.DataFrame(index=range(X.shape[1])) | |
| # Create basic AnnData object (skip problematic uns) | |
| adata = ad.AnnData(X=X, obs=obs, var=var) | |
| print("✓ Successfully loaded using manual h5py read") | |
| except Exception as e3: | |
| raise ValueError( | |
| f"Unable to load file with any method.\n\n" | |
| f"**Primary error:** {str(e)}\n\n" | |
| f"**Suggestions:**\n" | |
| f"1. The file may have been created with a newer AnnData version\n" | |
| f"2. Try re-saving the file with: `adata.write('file.h5ad', compression='gzip')`\n" | |
| f"3. Or use an older AnnData format: `adata.write_h5ad('file.h5ad', as_dense='X')`\n\n" | |
| f"**Technical details:** {str(e3)}" | |
| ) | |
| else: | |
| # Not a compatibility error - return the original error | |
| import traceback | |
| return None, (f" **Error loading data:**\n\n{str(e)}\n\n" | |
| f"**Traceback:**\n```\n{traceback.format_exc()}\n```") | |
| try: | |
| # Check for chamber information | |
| chamber_info = "" | |
| if 'chamber' in adata.obs.columns: | |
| chambers = adata.obs['chamber'].unique() | |
| chamber_info = f"\nChamber information detected: {', '.join(chambers)}" | |
| else: | |
| # Try to infer chamber from various metadata fields | |
| chamber_assigned = False | |
| # Debug: Show available metadata columns | |
| print(f"Available metadata columns: {list(adata.obs.columns)}") | |
| # Check for common chamber-related column names | |
| chamber_keywords = ['tissue', 'location', 'sample', 'orig.ident', 'biosample', 'cell', 'batch', 'donor', 'patient'] | |
| for col in adata.obs.columns: | |
| if any(keyword in col.lower() for keyword in chamber_keywords): | |
| print(f"Attempting to infer chamber from column: {col}") | |
| # Show sample values | |
| sample_values = adata.obs[col].unique()[:5] | |
| print(f" Sample values: {sample_values}") | |
| values = adata.obs[col].astype(str).str.upper() | |
| # Map common chamber identifiers | |
| def map_chamber(val): | |
| val = val.upper() | |
| if any(x in val for x in ['RA', 'RIGHT ATRI', 'R_ATRI']): | |
| return 'RA' | |
| elif any(x in val for x in ['RV', 'RIGHT VENT', 'R_VENT']): | |
| return 'RV' | |
| elif any(x in val for x in ['LA', 'LEFT ATRI', 'L_ATRI']): | |
| return 'LA' | |
| elif any(x in val for x in ['LV', 'LEFT VENT', 'L_VENT']): | |
| return 'LV' | |
| elif any(x in val for x in ['ATRI']): | |
| return 'RA' # Default atrium | |
| elif any(x in val for x in ['VENT']): | |
| return 'LV' # Default ventricle | |
| else: | |
| return None | |
| adata.obs['chamber'] = adata.obs[col].apply(map_chamber) | |
| # Check if we successfully assigned chambers | |
| if adata.obs['chamber'].notna().sum() > 0: | |
| chambers = adata.obs['chamber'].dropna().unique() | |
| if len(chambers) > 1: | |
| chamber_info = f"\nChamber information inferred from '{col}': {', '.join(chambers)}" | |
| chamber_assigned = True | |
| break | |
| else: | |
| # Fill NaN with the single detected chamber | |
| adata.obs['chamber'] = adata.obs['chamber'].fillna(chambers[0]) | |
| chamber_info = f"\nSingle chamber detected from '{col}': {chambers[0]}" | |
| chamber_assigned = True | |
| break | |
| if not chamber_assigned: | |
| chamber_info = "\n⚠ No chamber information found" | |
| chamber_info += "\n Single-chamber analysis will be performed" | |
| chamber_info += "\n For multi-chamber analysis, data should have 'chamber' column with values: RA, RV, LA, LV" | |
| # Don't assign a default chamber - let the analysis handle missing chamber info | |
| adata.obs['chamber'] = 'Unknown' | |
| validation_msg = f""" | |
| Data loaded successfully! | |
| - Cells: {adata.n_obs:,} | |
| - Genes: {adata.n_vars:,} | |
| {chamber_info} | |
| """ | |
| return adata, validation_msg | |
| except Exception as e: | |
| raise ValueError(f"Error validating loaded data: {str(e)}") | |
| def create_communication_network(adata, hub_stats, chamber_stats=None): | |
| """ | |
| Create interactive Plotly network graph showing inferred cell-cell communication | |
| based on co-expression of ligand-receptor genes | |
| Args: | |
| adata: AnnData object with clustering and gene expression | |
| hub_stats: DataFrame with hub scores per cell type | |
| chamber_stats: Optional DataFrame with chamber information | |
| Returns: | |
| Path to HTML file with interactive network | |
| """ | |
| import networkx as nx | |
| try: | |
| # Detect cluster column (try multiple common names) | |
| cluster_col = None | |
| for col in ['leiden', 'louvain', 'Cluster', 'cluster', 'cell_type', 'celltype']: | |
| if col in adata.obs.columns: | |
| cluster_col = col | |
| break | |
| if cluster_col is None: | |
| print(" No clustering column found, skipping network graph") | |
| return None | |
| print(f" Using clustering column: '{cluster_col}'") | |
| cell_types = adata.obs[cluster_col].unique() | |
| n_types = len(cell_types) | |
| # Load ligand-receptor pairs from database | |
| print(" Loading ligand-receptor database...") | |
| if LR_DATABASE_AVAILABLE: | |
| try: | |
| ligand_receptor_pairs = get_ligand_receptor_pairs( | |
| adata, | |
| resource='consensus', | |
| confidence_threshold=0.7 | |
| ) | |
| print(f" Loaded {len(ligand_receptor_pairs)} ligand-receptor pairs from database") | |
| except Exception as e: | |
| print(f" Warning: Could not load L-R database: {e}") | |
| print(" Using minimal fallback pairs") | |
| ligand_receptor_pairs = [ | |
| ('VEGFA', 'FLT1'), ('VEGFA', 'KDR'), | |
| ('TGFB1', 'TGFBR1'), ('TGFB1', 'TGFBR2'), | |
| ('FGF2', 'FGFR1'), ('IL6', 'IL6R'), | |
| ('TNF', 'TNFRSF1A'), ('CXCL12', 'CXCR4') | |
| ] | |
| else: | |
| print(" L-R database not available, using minimal fallback pairs") | |
| ligand_receptor_pairs = [ | |
| ('VEGFA', 'FLT1'), ('VEGFA', 'KDR'), | |
| ('TGFB1', 'TGFBR1'), ('TGFB1', 'TGFBR2'), | |
| ('FGF2', 'FGFR1'), ('IL6', 'IL6R'), | |
| ('TNF', 'TNFRSF1A'), ('CXCL12', 'CXCR4') | |
| ] | |
| # Calculate mean expression per cell type | |
| print(" Calculating cell type expression profiles...") | |
| cell_type_expression = {} | |
| for cell_type in cell_types: | |
| cell_mask = adata.obs[cluster_col] == cell_type | |
| # Convert pandas Series to numpy array for scipy sparse matrix indexing | |
| cell_mask_array = cell_mask.values if hasattr(cell_mask, 'values') else np.asarray(cell_mask) | |
| if hasattr(adata.X, 'toarray'): | |
| subset_expr = adata.X[cell_mask_array].toarray() | |
| else: | |
| subset_expr = adata.X[cell_mask_array] | |
| # Calculate mean and ensure it's a numpy array | |
| mean_expr = np.mean(subset_expr, axis=0) | |
| if hasattr(mean_expr, 'A1'): | |
| mean_expr = mean_expr.A1 | |
| elif hasattr(mean_expr, 'values'): | |
| mean_expr = mean_expr.values | |
| # Ensure it's flattened | |
| mean_expr = np.asarray(mean_expr).flatten() | |
| cell_type_expression[str(cell_type)] = mean_expr | |
| # Create network graph | |
| G = nx.DiGraph() # Directed graph for ligand->receptor | |
| # Add nodes for each cell type with metadata | |
| node_info = [] | |
| for i, cell_type in enumerate(cell_types): | |
| cell_mask = adata.obs[cluster_col] == cell_type | |
| n_cells = int(cell_mask.sum()) | |
| # Get hub score if available | |
| hub_score = 0 | |
| if hub_stats is not None and len(hub_stats) > 0: | |
| type_label = f"Cluster {cell_type}" | |
| matching = hub_stats[hub_stats['Cell Type'] == type_label] | |
| if len(matching) > 0: | |
| hub_score = matching.iloc[0]['Hub Score'] | |
| # Get chamber distribution | |
| chamber_dist = "" | |
| if chamber_stats is not None and 'chamber' in adata.obs.columns: | |
| type_chambers = adata.obs[cell_mask]['chamber'].value_counts() | |
| chamber_dist = ", ".join([f"{ch}: {cnt}" for ch, cnt in type_chambers.items()]) | |
| node_info.append({ | |
| 'id': str(cell_type), | |
| 'label': f"Cluster {cell_type}", | |
| 'size': int(n_cells), | |
| 'hub_score': float(hub_score), | |
| 'chamber_dist': chamber_dist, | |
| 'color_idx': i | |
| }) | |
| G.add_node(str(cell_type), | |
| size=int(n_cells), | |
| hub_score=float(hub_score), | |
| label=f"Cluster {cell_type}", | |
| color_idx=i) | |
| # Infer communication edges from ligand-receptor co-expression | |
| print(" Inferring cell-cell communication from gene expression...") | |
| edge_info = [] | |
| for ligand, receptor in ligand_receptor_pairs: | |
| # Check if genes exist in dataset | |
| if ligand not in adata.var_names or receptor not in adata.var_names: | |
| continue | |
| ligand_idx = list(adata.var_names).index(ligand) | |
| receptor_idx = list(adata.var_names).index(receptor) | |
| # Find cell types that express ligand and receptor | |
| for source_type in cell_types: | |
| ligand_expr = float(cell_type_expression[str(source_type)][ligand_idx]) | |
| if ligand_expr > 0.1: # Threshold for meaningful expression | |
| for target_type in cell_types: | |
| if source_type == target_type: | |
| continue | |
| receptor_expr = float(cell_type_expression[str(target_type)][receptor_idx]) | |
| if receptor_expr > 0.1: # Both genes expressed | |
| # Calculate interaction strength | |
| strength = float(np.sqrt(ligand_expr * receptor_expr)) | |
| edge_key = (str(source_type), str(target_type)) | |
| edge_info.append({ | |
| 'source': str(source_type), | |
| 'target': str(target_type), | |
| 'ligand': ligand, | |
| 'receptor': receptor, | |
| 'strength': strength | |
| }) | |
| # Add or update edge | |
| if G.has_edge(str(source_type), str(target_type)): | |
| G[str(source_type)][str(target_type)]['weight'] += strength | |
| G[str(source_type)][str(target_type)]['interactions'].append(f"{ligand}-{receptor}") | |
| else: | |
| G.add_edge(str(source_type), str(target_type), | |
| weight=strength, | |
| interactions=[f"{ligand}-{receptor}"]) | |
| print(f" Found {G.number_of_edges()} communication interactions between {G.number_of_nodes()} cell types") | |
| # Create Plotly figure with layout | |
| pos = nx.spring_layout(G, k=2.5, iterations=50, seed=42, weight='weight') | |
| # Create edge traces (one per edge for hover info) | |
| edge_traces = [] | |
| for edge in G.edges(data=True): | |
| x0, y0 = pos[edge[0]] | |
| x1, y1 = pos[edge[1]] | |
| weight = edge[2].get('weight', 0) | |
| interactions = edge[2].get('interactions', []) | |
| # Create arrow shape for directed edge | |
| edge_trace = go.Scatter( | |
| x=[x0, x1, None], | |
| y=[y0, y1, None], | |
| mode='lines', | |
| line=dict( | |
| width=max(0.5, min(5, weight * 2)), # Scale by strength | |
| color='rgba(150, 150, 150, 0.5)' | |
| ), | |
| hoverinfo='text', | |
| hovertext=f"<b>{edge[0]} → {edge[1]}</b><br>" + | |
| f"Interaction strength: {weight:.3f}<br>" + | |
| f"Ligand-Receptor pairs: {len(interactions)}<br>" + | |
| "<br>".join(interactions[:5]) + # Show first 5 | |
| (f"<br>... and {len(interactions)-5} more" if len(interactions) > 5 else ""), | |
| showlegend=False | |
| ) | |
| edge_traces.append(edge_trace) | |
| # Generate distinct colors for cell types | |
| import plotly.colors as pcolors | |
| if n_types <= 10: | |
| colors = pcolors.qualitative.Set3[:n_types] | |
| else: | |
| colors = pcolors.sample_colorscale("turbo", [i/n_types for i in range(n_types)]) | |
| # Create separate trace for each cell type (to show in legend) | |
| node_traces = [] | |
| for node_data in node_info: | |
| node_id = node_data['id'] | |
| if node_id not in pos: | |
| continue | |
| x, y = pos[node_id] | |
| label = node_data['label'] | |
| size = node_data['size'] | |
| hub_score = node_data['hub_score'] | |
| chamber_dist = node_data['chamber_dist'] | |
| color_idx = node_data['color_idx'] | |
| # Count outgoing and incoming communications | |
| out_edges = G.out_degree(node_id) | |
| in_edges = G.in_degree(node_id) | |
| hover_text = ( | |
| f"<b>{label}</b><br>" | |
| f"Cells: {size:,}<br>" | |
| f"Hub Score: {hub_score:.4f}<br>" | |
| f"Sends signals to: {out_edges} cell types<br>" | |
| f"Receives from: {in_edges} cell types<br>" | |
| f"{chamber_dist}" | |
| ) | |
| node_trace = go.Scatter( | |
| x=[x], | |
| y=[y], | |
| mode='markers+text', | |
| marker=dict( | |
| size=max(15, min(60, size / 20)), # Scale by cell count | |
| color=colors[color_idx % len(colors)], | |
| line=dict(width=2, color='white'), | |
| symbol='circle' | |
| ), | |
| text=label.replace('Cluster ', 'C'), | |
| textposition="top center", | |
| textfont=dict(size=10, color='black'), | |
| hoverinfo='text', | |
| hovertext=hover_text, | |
| name=label, | |
| legendgroup=label, | |
| showlegend=True | |
| ) | |
| node_traces.append(node_trace) | |
| # Store edge information for click interactions (JSON format for JavaScript) | |
| import json | |
| edge_data_for_js = [] | |
| for edge in G.edges(data=True): | |
| interactions = edge[2].get('interactions', []) | |
| edge_data_for_js.append({ | |
| 'source': edge[0], | |
| 'target': edge[1], | |
| 'weight': float(edge[2].get('weight', 0)), | |
| 'interactions': interactions, | |
| 'source_pos': list(pos[edge[0]]), | |
| 'target_pos': list(pos[edge[1]]) | |
| }) | |
| # Store node information for highlighting | |
| node_data_for_js = [] | |
| for node_data in node_info: | |
| if node_data['id'] in pos: | |
| node_data_for_js.append({ | |
| 'id': node_data['id'], | |
| 'label': node_data['label'], | |
| 'pos': list(pos[node_data['id']]), | |
| 'connected_to': [str(t) for t in G.successors(node_data['id'])], | |
| 'connected_from': [str(s) for s in G.predecessors(node_data['id'])] | |
| }) | |
| # Create figure with all traces | |
| fig = go.Figure(data=edge_traces + node_traces) | |
| fig.update_layout( | |
| title={ | |
| 'text': "Interactive Cell-Cell Communication Network<br><sub>Click nodes to highlight connections | Click edges to see L-R pairs</sub>", | |
| 'x': 0.5, | |
| 'xanchor': 'center', | |
| 'font': {'size': 20} | |
| }, | |
| showlegend=True, | |
| legend=dict( | |
| title="Cell Types (click to toggle)", | |
| orientation="v", | |
| yanchor="top", | |
| y=1, | |
| xanchor="left", | |
| x=1.05, | |
| bgcolor="rgba(255,255,255,0.8)", | |
| bordercolor="gray", | |
| borderwidth=1 | |
| ), | |
| hovermode='closest', | |
| width=1400, | |
| height=900, | |
| plot_bgcolor='#f8f9fa', | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| annotations=[ | |
| dict( | |
| text="🖱️ Click cluster to see detailed view with all incoming/outgoing communications | 🖱️ Click edge to see L-R pairs", | |
| showarrow=False, | |
| xref="paper", yref="paper", | |
| x=0.5, y=-0.02, | |
| xanchor='center', | |
| font=dict(size=11, color='gray') | |
| ) | |
| ], | |
| margin=dict(l=20, r=250, t=80, b=40), | |
| clickmode='event+select' | |
| ) | |
| # Save as HTML with custom JavaScript for interactivity | |
| html_path = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False).name | |
| # Generate base HTML | |
| fig.write_html( | |
| html_path, | |
| include_plotlyjs='cdn', | |
| config={ | |
| 'displayModeBar': True, | |
| 'displaylogo': False, | |
| 'toImageButtonOptions': { | |
| 'format': 'png', | |
| 'filename': 'communication_network', | |
| 'height': 800, | |
| 'width': 1200, | |
| 'scale': 2 | |
| } | |
| } | |
| ) | |
| # Add custom JavaScript for node/edge interactivity | |
| with open(html_path, 'r', encoding='utf-8') as f: | |
| html_content = f.read() | |
| # Inject JavaScript before </body> | |
| # Use triple quotes and escape only what's needed | |
| edge_data_json = json.dumps(edge_data_for_js) | |
| node_data_json = json.dumps(node_data_for_js) | |
| custom_js = ''' | |
| <script> | |
| // Store edge and node data | |
| const edgeData = ''' + edge_data_json + '''; | |
| const nodeData = ''' + node_data_json + '''; | |
| let selectedNode = null; | |
| let expandedEdges = new Set(); | |
| // Add click event listener to the plot | |
| document.addEventListener('DOMContentLoaded', function() { | |
| const plotDiv = document.querySelector('.plotly-graph-div'); | |
| if (!plotDiv) { | |
| console.error('Plotly graph div not found'); | |
| return; | |
| } | |
| console.log('Setting up click handler. Edges:', edgeData.length, 'Nodes:', nodeData.length); | |
| plotDiv.on('plotly_click', function(data) { | |
| const point = data.points[0]; | |
| console.log('Click detected - curveNumber:', point.curveNumber, 'pointNumber:', point.pointNumber); | |
| // Check if clicked on a node (nodes come after edges) | |
| const isNode = point.curveNumber >= edgeData.length; | |
| if (isNode) { | |
| console.log('Node clicked'); | |
| // Node clicked - highlight connections | |
| handleNodeClick(point); | |
| } else { | |
| console.log('Edge clicked'); | |
| // Edge clicked - show L-R pairs | |
| handleEdgeClick(point); | |
| } | |
| }); | |
| }); | |
| function handleNodeClick(point) { | |
| const clickedLabel = point.data.name; | |
| console.log('Node click handler - label:', clickedLabel, 'point:', point); | |
| // Find the node data | |
| const node = nodeData.find(n => n.label === clickedLabel); | |
| if (!node) { | |
| console.error('Could not find node with label:', clickedLabel); | |
| console.log('Available nodes:', nodeData.map(n => n.label)); | |
| return; | |
| } | |
| console.log('Found node data:', node); | |
| // Show detailed cluster view with communications | |
| showClusterDetailView(node); | |
| // Also highlight connections in main network | |
| selectedNode = node.id; | |
| highlightNodeConnections(node); | |
| } | |
| function showClusterDetailView(node) { | |
| // Create detail window/modal showing cluster information | |
| let detailWindow = document.getElementById('cluster-detail-window'); | |
| if (!detailWindow) { | |
| detailWindow = document.createElement('div'); | |
| detailWindow.id = 'cluster-detail-window'; | |
| detailWindow.style.cssText = | |
| 'position: fixed;' + | |
| 'top: 50px;' + | |
| 'left: 50%;' + | |
| 'transform: translateX(-50%);' + | |
| 'background: white;' + | |
| 'border: 3px solid #667eea;' + | |
| 'border-radius: 12px;' + | |
| 'padding: 0;' + | |
| 'width: 80%;' + | |
| 'max-width: 900px;' + | |
| 'max-height: 85vh;' + | |
| 'overflow-y: auto;' + | |
| 'box-shadow: 0 8px 32px rgba(0,0,0,0.3);' + | |
| 'z-index: 10001;' + | |
| 'font-family: Arial, sans-serif;'; | |
| document.body.appendChild(detailWindow); | |
| } | |
| // Get incoming and outgoing communications for this cluster | |
| const incomingComms = edgeData.filter(e => e.target === node.id); | |
| const outgoingComms = edgeData.filter(e => e.source === node.id); | |
| // Create incoming communications HTML | |
| let incomingHTML = ''; | |
| if (incomingComms.length > 0) { | |
| incomingHTML = '<div style="margin-top: 15px;"><h4 style="color: #28a745; margin: 10px 0;">📥 Receiving Signals From:</h4>'; | |
| incomingComms.forEach(comm => { | |
| incomingHTML += | |
| '<div style="background: #f0f8f0; padding: 12px; margin: 8px 0; border-radius: 6px; border-left: 4px solid #28a745;">' + | |
| '<strong style="color: #28a745;">From ' + comm.source + '</strong>' + | |
| '<div style="margin-top: 5px; font-size: 13px;">Strength: ' + comm.weight.toFixed(3) + '</div>' + | |
| '<div style="margin-top: 5px;"><strong>L-R Pairs (' + comm.interactions.length + '):</strong></div>' + | |
| '<ul style="margin: 5px 0; padding-left: 20px; font-size: 12px;">'; | |
| comm.interactions.forEach(pair => { | |
| incomingHTML += '<li>' + pair + '</li>'; | |
| }); | |
| incomingHTML += '</ul></div>'; | |
| }); | |
| incomingHTML += '</div>'; | |
| } | |
| // Create outgoing communications HTML | |
| let outgoingHTML = ''; | |
| if (outgoingComms.length > 0) { | |
| outgoingHTML = '<div style="margin-top: 15px;"><h4 style="color: #dc3545; margin: 10px 0;">📤 Sending Signals To:</h4>'; | |
| outgoingComms.forEach(comm => { | |
| outgoingHTML += | |
| '<div style="background: #fff0f0; padding: 12px; margin: 8px 0; border-radius: 6px; border-left: 4px solid #dc3545;">' + | |
| '<strong style="color: #dc3545;">To ' + comm.target + '</strong>' + | |
| '<div style="margin-top: 5px; font-size: 13px;">Strength: ' + comm.weight.toFixed(3) + '</div>' + | |
| '<div style="margin-top: 5px;"><strong>L-R Pairs (' + comm.interactions.length + '):</strong></div>' + | |
| '<ul style="margin: 5px 0; padding-left: 20px; font-size: 12px;">'; | |
| comm.interactions.forEach(pair => { | |
| outgoingHTML += '<li>' + pair + '</li>'; | |
| }); | |
| outgoingHTML += '</ul></div>'; | |
| }); | |
| outgoingHTML += '</div>'; | |
| } | |
| // Generate communication network visualization for this cluster | |
| const clusterNetworkHTML = generateClusterNetworkSVG(node, incomingComms, outgoingComms); | |
| detailWindow.innerHTML = | |
| '<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 12px 12px 0 0;">' + | |
| '<h2 style="margin: 0;">🔬 ' + node.label + ' - Detailed View</h2>' + | |
| '<p style="margin: 5px 0 0 0; opacity: 0.9;">Communication patterns and cell information</p>' + | |
| '</div>' + | |
| '<div style="padding: 20px;">' + | |
| '<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-bottom: 20px;">' + | |
| '<div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #667eea;">' + | |
| '<div style="font-size: 12px; color: #666; text-transform: uppercase; margin-bottom: 5px;">Connected To</div>' + | |
| '<div style="font-size: 24px; font-weight: bold; color: #333;">' + node.connected_to.length + ' clusters</div>' + | |
| '</div>' + | |
| '<div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #764ba2;">' + | |
| '<div style="font-size: 12px; color: #666; text-transform: uppercase; margin-bottom: 5px;">Receives From</div>' + | |
| '<div style="font-size: 24px; font-weight: bold; color: #333;">' + node.connected_from.length + ' clusters</div>' + | |
| '</div>' + | |
| '<div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #28a745;">' + | |
| '<div style="font-size: 12px; color: #666; text-transform: uppercase; margin-bottom: 5px;">Total Communications</div>' + | |
| '<div style="font-size: 24px; font-weight: bold; color: #333;">' + (incomingComms.length + outgoingComms.length) + '</div>' + | |
| '</div>' + | |
| '</div>' + | |
| '<div style="background: #f8f9fa; padding: 20px; border-radius: 8px; margin-bottom: 20px;">' + | |
| '<h3 style="margin: 0 0 15px 0; color: #667eea;">📊 Communication Network</h3>' + | |
| clusterNetworkHTML + | |
| '</div>' + | |
| outgoingHTML + | |
| incomingHTML + | |
| '<div style="margin-top: 20px; text-align: center;">' + | |
| '<button onclick="document.getElementById(\'cluster-detail-window\').style.display=\'none\'; resetEdgeStyles();" style="' + | |
| 'padding: 10px 30px;' + | |
| 'background: #667eea;' + | |
| 'color: white;' + | |
| 'border: none;' + | |
| 'border-radius: 6px;' + | |
| 'cursor: pointer;' + | |
| 'font-size: 16px;' + | |
| 'font-weight: bold;' + | |
| '">Close</button>' + | |
| '</div>' + | |
| '</div>'; | |
| detailWindow.style.display = 'block'; | |
| } | |
| function generateClusterNetworkSVG(centerNode, incoming, outgoing) { | |
| // Create a simple SVG visualization of the cluster's connections | |
| const svgWidth = 800; | |
| const svgHeight = 400; | |
| const centerX = svgWidth / 2; | |
| const centerY = svgHeight / 2; | |
| const radius = 120; | |
| let svg = '<svg width="' + svgWidth + '" height="' + svgHeight + '" style="background: white; border-radius: 8px;">'; | |
| // Draw center node (the selected cluster) | |
| svg += '<circle cx="' + centerX + '" cy="' + centerY + '" r="40" fill="#667eea" stroke="#764ba2" stroke-width="3"/>'; | |
| svg += '<text x="' + centerX + '" y="' + (centerY + 5) + '" text-anchor="middle" fill="white" font-weight="bold" font-size="14">' + centerNode.id + '</text>'; | |
| // Draw incoming connections (on the left) | |
| const incomingAngleStep = incoming.length > 1 ? Math.PI / (incoming.length + 1) : Math.PI / 2; | |
| incoming.forEach((comm, i) => { | |
| const angle = Math.PI - (i + 1) * incomingAngleStep; | |
| const x = centerX + radius * Math.cos(angle); | |
| const y = centerY + radius * Math.sin(angle); | |
| // Draw edge | |
| svg += '<line x1="' + x + '" y1="' + y + '" x2="' + (centerX - 40) + '" y2="' + centerY + '" stroke="#28a745" stroke-width="2" marker-end="url(#arrowgreen)"/>'; | |
| // Draw node | |
| svg += '<circle cx="' + x + '" cy="' + y + '" r="25" fill="#28a745" stroke="white" stroke-width="2"/>'; | |
| svg += '<text x="' + x + '" y="' + (y + 4) + '" text-anchor="middle" fill="white" font-size="11" font-weight="bold">' + comm.source + '</text>'; | |
| }); | |
| // Draw outgoing connections (on the right) | |
| const outgoingAngleStep = outgoing.length > 1 ? Math.PI / (outgoing.length + 1) : Math.PI / 2; | |
| outgoing.forEach((comm, i) => { | |
| const angle = (i + 1) * outgoingAngleStep; | |
| const x = centerX + radius * Math.cos(angle); | |
| const y = centerY + radius * Math.sin(angle); | |
| // Draw edge | |
| svg += '<line x1="' + (centerX + 40) + '" y1="' + centerY + '" x2="' + x + '" y2="' + y + '" stroke="#dc3545" stroke-width="2" marker-end="url(#arrowred)"/>'; | |
| // Draw node | |
| svg += '<circle cx="' + x + '" cy="' + y + '" r="25" fill="#dc3545" stroke="white" stroke-width="2"/>'; | |
| svg += '<text x="' + x + '" y="' + (y + 4) + '" text-anchor="middle" fill="white" font-size="11" font-weight="bold">' + comm.target + '</text>'; | |
| }); | |
| // Add arrow markers | |
| svg += '<defs>' + | |
| '<marker id="arrowgreen" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto" markerUnits="strokeWidth">' + | |
| '<path d="M0,0 L0,6 L9,3 z" fill="#28a745" />' + | |
| '</marker>' + | |
| '<marker id="arrowred" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto" markerUnits="strokeWidth">' + | |
| '<path d="M0,0 L0,6 L9,3 z" fill="#dc3545" />' + | |
| '</marker>' + | |
| '</defs>'; | |
| svg += '</svg>'; | |
| return svg; | |
| } | |
| function highlightNodeConnections(node) { | |
| // Get connected edge indices | |
| const connectedEdgeIndices = []; | |
| edgeData.forEach((edge, idx) => { | |
| if (edge.source === node.id || edge.target === node.id) { | |
| connectedEdgeIndices.push(idx); | |
| } | |
| }); | |
| console.log('Highlighting connections for node:', node.id, 'Connected edges:', connectedEdgeIndices); | |
| // Dim all edges except connected ones | |
| const plotDiv = document.querySelector('.plotly-graph-div'); | |
| try { | |
| // Prepare arrays for batch update | |
| const traceIndices = []; | |
| const lineColors = []; | |
| const lineWidths = []; | |
| edgeData.forEach((edge, idx) => { | |
| const isConnected = connectedEdgeIndices.includes(idx); | |
| traceIndices.push(idx); | |
| lineColors.push(isConnected ? | |
| 'rgba(255, 100, 100, 0.8)' : // Highlight connected | |
| 'rgba(150, 150, 150, 0.2)'); // Dim others | |
| lineWidths.push(isConnected ? | |
| Math.max(3, edge.weight * 4) : // Thicker connected | |
| Math.max(0.5, edge.weight * 1)); // Thinner others | |
| }); | |
| // Batch update all edge traces | |
| Plotly.restyle(plotDiv, { | |
| 'line.color': lineColors, | |
| 'line.width': lineWidths | |
| }, traceIndices); | |
| console.log('Edge highlighting applied successfully'); | |
| } catch (error) { | |
| console.error('Error highlighting edges:', error); | |
| } | |
| // Show info box | |
| showNodeInfo(node); | |
| } | |
| function resetEdgeStyles() { | |
| const plotDiv = document.querySelector('.plotly-graph-div'); | |
| console.log('Resetting edge styles'); | |
| try { | |
| // Prepare arrays for batch reset | |
| const traceIndices = []; | |
| const lineColors = []; | |
| const lineWidths = []; | |
| edgeData.forEach((edge, idx) => { | |
| traceIndices.push(idx); | |
| lineColors.push('rgba(150, 150, 150, 0.5)'); | |
| lineWidths.push(Math.max(0.5, Math.min(5, edge.weight * 2))); | |
| }); | |
| // Batch reset all edge traces | |
| Plotly.restyle(plotDiv, { | |
| 'line.color': lineColors, | |
| 'line.width': lineWidths | |
| }, traceIndices); | |
| console.log('Edge styles reset successfully'); | |
| } catch (error) { | |
| console.error('Error resetting edges:', error); | |
| } | |
| hideInfoBox(); | |
| } | |
| function handleEdgeClick(point) { | |
| const edgeIdx = point.curveNumber; | |
| console.log('Edge click handler - index:', edgeIdx, 'Total edges:', edgeData.length); | |
| const edge = edgeData[edgeIdx]; | |
| if (!edge) { | |
| console.error('No edge data found for index:', edgeIdx); | |
| return; | |
| } | |
| console.log('Edge clicked:', edge); | |
| // Show detailed L-R pair information | |
| showEdgeDetails(edge); | |
| } | |
| function showNodeInfo(node) { | |
| // Create or update info box | |
| let infoBox = document.getElementById('network-info-box'); | |
| if (!infoBox) { | |
| infoBox = document.createElement('div'); | |
| infoBox.id = 'network-info-box'; | |
| infoBox.style.cssText = | |
| 'position: fixed;' + | |
| 'top: 100px;' + | |
| 'right: 20px;' + | |
| 'background: white;' + | |
| 'border: 2px solid #4CAF50;' + | |
| 'border-radius: 8px;' + | |
| 'padding: 15px;' + | |
| 'max-width: 300px;' + | |
| 'box-shadow: 0 4px 8px rgba(0,0,0,0.2);' + | |
| 'z-index: 10000;' + | |
| 'font-family: Arial, sans-serif;'; | |
| document.body.appendChild(infoBox); | |
| } | |
| const connectedTo = node.connected_to.join(', ') || 'None'; | |
| const connectedFrom = node.connected_from.join(', ') || 'None'; | |
| infoBox.innerHTML = | |
| '<h3 style="margin: 0 0 10px 0; color: #4CAF50;">' + | |
| '🔍 ' + node.label + | |
| '</h3>' + | |
| '<p style="margin: 5px 0;"><strong>Signals TO:</strong><br>' + | |
| '<span style="font-size: 12px;">' + connectedTo + '</span></p>' + | |
| '<p style="margin: 5px 0;"><strong>Receives FROM:</strong><br>' + | |
| '<span style="font-size: 12px;">' + connectedFrom + '</span></p>' + | |
| '<button onclick="resetEdgeStyles()" style="' + | |
| 'margin-top: 10px;' + | |
| 'padding: 5px 10px;' + | |
| 'background: #f44336;' + | |
| 'color: white;' + | |
| 'border: none;' + | |
| 'border-radius: 4px;' + | |
| 'cursor: pointer;' + | |
| '">Clear Highlight</button>'; | |
| infoBox.style.display = 'block'; | |
| } | |
| function showEdgeDetails(edge) { | |
| // Create or update detail box | |
| let detailBox = document.getElementById('edge-detail-box'); | |
| if (!detailBox) { | |
| detailBox = document.createElement('div'); | |
| detailBox.id = 'edge-detail-box'; | |
| detailBox.style.cssText = | |
| 'position: fixed;' + | |
| 'top: 100px;' + | |
| 'left: 20px;' + | |
| 'background: white;' + | |
| 'border: 2px solid #2196F3;' + | |
| 'border-radius: 8px;' + | |
| 'padding: 15px;' + | |
| 'max-width: 400px;' + | |
| 'max-height: 500px;' + | |
| 'overflow-y: auto;' + | |
| 'box-shadow: 0 4px 8px rgba(0,0,0,0.2);' + | |
| 'z-index: 10000;' + | |
| 'font-family: Arial, sans-serif;'; | |
| document.body.appendChild(detailBox); | |
| } | |
| const interactionList = edge.interactions.map(pair => | |
| '<li style="margin: 5px 0; font-size: 13px;">📡 ' + pair + '</li>' | |
| ).join(''); | |
| detailBox.innerHTML = | |
| '<h3 style="margin: 0 0 10px 0; color: #2196F3;">' + | |
| edge.source + ' ➡ ' + edge.target + | |
| '</h3>' + | |
| '<p style="margin: 5px 0;"><strong>Interaction Strength:</strong> ' + edge.weight.toFixed(3) + '</p>' + | |
| '<p style="margin: 5px 0;"><strong>Ligand-Receptor Pairs (' + edge.interactions.length + '):</strong></p>' + | |
| '<ul style="margin: 5px 0; padding-left: 20px;">' + | |
| interactionList + | |
| '</ul>' + | |
| '<button onclick="document.getElementById(\'edge-detail-box\').style.display=\'none\'" style="' + | |
| 'margin-top: 10px;' + | |
| 'padding: 5px 10px;' + | |
| 'background: #f44336;' + | |
| 'color: white;' + | |
| 'border: none;' + | |
| 'border-radius: 4px;' + | |
| 'cursor: pointer;' + | |
| '">Close</button>'; | |
| detailBox.style.display = 'block'; | |
| } | |
| function hideInfoBox() { | |
| const infoBox = document.getElementById('network-info-box'); | |
| if (infoBox) { | |
| infoBox.style.display = 'none'; | |
| } | |
| } | |
| </script> | |
| ''' | |
| # Insert JavaScript before </body> | |
| html_content = html_content.replace('</body>', custom_js + '</body>') | |
| # Write back | |
| with open(html_path, 'w', encoding='utf-8') as f: | |
| f.write(html_content) | |
| print(f"✓ Created interactive communication network: {html_path}") | |
| return html_path | |
| except Exception as e: | |
| import traceback | |
| print(f"Warning: Could not create network graph: {e}") | |
| print(f"Traceback: {traceback.format_exc()}") | |
| return None | |
| def create_cluster_detail_view(adata, cluster_id, cluster_col='leiden'): | |
| """ | |
| Create detailed view of a specific cluster showing: | |
| 1. Cell type composition and statistics | |
| 2. Sub-network of cells within that cluster | |
| 3. Communication patterns (incoming/outgoing) | |
| Args: | |
| adata: AnnData object | |
| cluster_id: The cluster ID to analyze | |
| cluster_col: Column name containing cluster assignments | |
| Returns: | |
| Dictionary with cluster details and HTML visualizations | |
| """ | |
| import networkx as nx | |
| try: | |
| # Get cells in this cluster | |
| cluster_mask = adata.obs[cluster_col] == cluster_id | |
| cluster_adata = adata[cluster_mask].copy() | |
| n_cells = cluster_adata.n_obs | |
| # Calculate cluster statistics | |
| stats = { | |
| 'cluster_id': str(cluster_id), | |
| 'n_cells': int(n_cells), | |
| 'n_genes': int(cluster_adata.n_vars), | |
| 'total_counts_mean': float(np.mean(cluster_adata.obs.get('total_counts', [0]))), | |
| 'n_genes_by_counts_mean': float(np.mean(cluster_adata.obs.get('n_genes_by_counts', [0]))), | |
| } | |
| # Get chamber distribution if available | |
| if 'chamber' in cluster_adata.obs.columns: | |
| chamber_counts = cluster_adata.obs['chamber'].value_counts() | |
| stats['chamber_distribution'] = chamber_counts.to_dict() | |
| # Get top expressed genes in this cluster | |
| if hasattr(cluster_adata.X, 'toarray'): | |
| mean_expr = np.mean(cluster_adata.X.toarray(), axis=0) | |
| else: | |
| mean_expr = np.mean(cluster_adata.X, axis=0) | |
| if hasattr(mean_expr, 'A1'): | |
| mean_expr = mean_expr.A1 | |
| mean_expr = np.asarray(mean_expr).flatten() | |
| top_gene_indices = np.argsort(mean_expr)[-20:][::-1] | |
| top_genes = [(str(cluster_adata.var_names[i]), float(mean_expr[i])) | |
| for i in top_gene_indices] | |
| stats['top_genes'] = top_genes | |
| # Create HTML visualization | |
| html_content = f""" | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>Cluster {cluster_id} Details</title> | |
| <style> | |
| body {{ | |
| font-family: Arial, sans-serif; | |
| padding: 20px; | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| background: #f5f5f5; | |
| }} | |
| .header {{ | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 30px; | |
| border-radius: 10px; | |
| margin-bottom: 20px; | |
| }} | |
| .stats-grid {{ | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); | |
| gap: 20px; | |
| margin-bottom: 20px; | |
| }} | |
| .stat-card {{ | |
| background: white; | |
| padding: 20px; | |
| border-radius: 8px; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| }} | |
| .stat-card h3 {{ | |
| margin: 0 0 10px 0; | |
| color: #667eea; | |
| font-size: 14px; | |
| text-transform: uppercase; | |
| }} | |
| .stat-card .value {{ | |
| font-size: 32px; | |
| font-weight: bold; | |
| color: #333; | |
| }} | |
| .section {{ | |
| background: white; | |
| padding: 20px; | |
| border-radius: 8px; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| margin-bottom: 20px; | |
| }} | |
| .gene-list {{ | |
| display: grid; | |
| grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); | |
| gap: 10px; | |
| }} | |
| .gene-item {{ | |
| padding: 10px; | |
| background: #f8f9fa; | |
| border-radius: 4px; | |
| border-left: 3px solid #667eea; | |
| }} | |
| .gene-name {{ | |
| font-weight: bold; | |
| color: #333; | |
| }} | |
| .gene-expr {{ | |
| color: #666; | |
| font-size: 12px; | |
| }} | |
| .chamber-bar {{ | |
| height: 30px; | |
| background: #e0e0e0; | |
| border-radius: 4px; | |
| margin: 5px 0; | |
| position: relative; | |
| overflow: hidden; | |
| }} | |
| .chamber-fill {{ | |
| height: 100%; | |
| background: linear-gradient(90deg, #667eea, #764ba2); | |
| display: flex; | |
| align-items: center; | |
| padding-left: 10px; | |
| color: white; | |
| font-weight: bold; | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="header"> | |
| <h1>🔬 Cluster {cluster_id} Detailed Analysis</h1> | |
| <p>Comprehensive view of cell population and communication patterns</p> | |
| </div> | |
| <div class="stats-grid"> | |
| <div class="stat-card"> | |
| <h3>Total Cells</h3> | |
| <div class="value">{n_cells:,}</div> | |
| </div> | |
| <div class="stat-card"> | |
| <h3>Genes Detected</h3> | |
| <div class="value">{stats['n_genes']:,}</div> | |
| </div> | |
| <div class="stat-card"> | |
| <h3>Avg UMI Count</h3> | |
| <div class="value">{stats['total_counts_mean']:.0f}</div> | |
| </div> | |
| <div class="stat-card"> | |
| <h3>Avg Genes/Cell</h3> | |
| <div class="value">{stats['n_genes_by_counts_mean']:.0f}</div> | |
| </div> | |
| </div> | |
| """ | |
| # Add chamber distribution if available | |
| if 'chamber_distribution' in stats: | |
| total_cells = sum(stats['chamber_distribution'].values()) | |
| html_content += """ | |
| <div class="section"> | |
| <h2>📍 Chamber Distribution</h2> | |
| """ | |
| for chamber, count in sorted(stats['chamber_distribution'].items(), key=lambda x: x[1], reverse=True): | |
| percentage = (count / total_cells) * 100 | |
| html_content += f""" | |
| <div style="margin: 10px 0;"> | |
| <div style="display: flex; justify-content: space-between; margin-bottom: 5px;"> | |
| <span><strong>{chamber}</strong></span> | |
| <span>{count:,} cells ({percentage:.1f}%)</span> | |
| </div> | |
| <div class="chamber-bar"> | |
| <div class="chamber-fill" style="width: {percentage}%;"> | |
| {percentage:.1f}% | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| html_content += "</div>" | |
| # Add top expressed genes | |
| html_content += """ | |
| <div class="section"> | |
| <h2>🧬 Top 20 Expressed Genes</h2> | |
| <div class="gene-list"> | |
| """ | |
| for gene_name, expr_val in top_genes: | |
| html_content += f""" | |
| <div class="gene-item"> | |
| <div class="gene-name">{gene_name}</div> | |
| <div class="gene-expr">Expression: {expr_val:.2f}</div> | |
| </div> | |
| """ | |
| html_content += """ | |
| </div> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| # Save to temporary file | |
| detail_path = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False, encoding='utf-8').name | |
| with open(detail_path, 'w', encoding='utf-8') as f: | |
| f.write(html_content) | |
| return { | |
| 'stats': stats, | |
| 'html_path': detail_path | |
| } | |
| except Exception as e: | |
| import traceback | |
| print(f"Error creating cluster detail view: {e}") | |
| print(traceback.format_exc()) | |
| return None | |
| def analyze_heart_data( | |
| uploaded_file, | |
| analysis_type, | |
| max_cells, | |
| max_genes, | |
| include_chamber_analysis, | |
| include_communication_hubs, | |
| progress=gr.Progress() | |
| ) -> Tuple[str, str, str, str, str, str, str, str, str]: | |
| """Comprehensive HeartMAP analysis with chamber-specific insights | |
| Returns: | |
| Tuple of (output_msg, csv_file, viz_chamber, viz_hubs, viz_corr, viz_markers, viz_network, chamber_info, chamber_info_file) | |
| """ | |
| if not HEARTMAP_AVAILABLE: | |
| gr.Info(" HeartMAP dependencies not available", duration=5) | |
| return " HeartMAP not available. Please install dependencies.", None, None, None, None, None, None, "", None | |
| if uploaded_file is None: | |
| gr.Info("⚠️ Please upload a data file", duration=3) | |
| return "⚠️ Please upload a file.", None, None, None, None, None, None, "", None | |
| # Validate file upload completed successfully | |
| progress(0, desc="🔍 Validating file...") | |
| try: | |
| if not Path(uploaded_file).exists(): | |
| gr.Warning("Upload incomplete - please try again", duration=5) | |
| return " Upload incomplete. Please try uploading the file again.", None, None, None, None, None, None, "", None | |
| file_size_mb = Path(uploaded_file).stat().st_size / (1024 * 1024) | |
| file_size_gb = file_size_mb / 1024 | |
| print(f"Processing file: {file_size_mb:.2f} MB ({file_size_gb:.2f} GB)") | |
| if file_size_mb > 10240: # 10GB limit | |
| gr.Warning(f"File too large: {file_size_gb:.2f} GB (max 10 GB)", duration=10) | |
| return f" File size ({file_size_gb:.2f} GB) exceeds maximum limit (10 GB). Please use a smaller dataset or subset your data.", None, None, None, None, None, None, "", None | |
| elif file_size_mb > 1024: # Warn for files > 1GB | |
| print(f"Large file detected ({file_size_gb:.2f} GB). Processing may take 10-30 minutes...") | |
| gr.Info(f" Large file ({file_size_gb:.2f} GB) - Processing may take 10-30 minutes. Please be patient!", duration=15) | |
| elif file_size_mb > 500: | |
| print(f"Large file detected ({file_size_mb:.1f} MB). Processing may take several minutes...") | |
| gr.Info(f" Processing {file_size_mb:.1f} MB file - This may take several minutes", duration=10) | |
| else: | |
| gr.Info(f"✓ File validated ({file_size_mb:.1f} MB) - Starting analysis...", duration=5) | |
| except Exception as e: | |
| gr.Warning(f"File validation error: {str(e)}", duration=8) | |
| return f" File validation error: {str(e)}. Please re-upload the file.", None, None, None, None, None, None, "", None | |
| persistent_csv = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) | |
| persistent_csv_path = persistent_csv.name | |
| persistent_csv.close() | |
| # Create persistent temporary files for visualizations (outside temp_dir context) | |
| persistent_viz_chamber = tempfile.NamedTemporaryFile(mode='w', suffix='_chamber_distribution.html', delete=False) | |
| persistent_viz_chamber_path = persistent_viz_chamber.name | |
| persistent_viz_chamber.close() | |
| persistent_viz_hubs = tempfile.NamedTemporaryFile(mode='w', suffix='_hub_scores.html', delete=False) | |
| persistent_viz_hubs_path = persistent_viz_hubs.name | |
| persistent_viz_hubs.close() | |
| persistent_viz_corr = tempfile.NamedTemporaryFile(mode='w', suffix='_correlations.html', delete=False) | |
| persistent_viz_corr_path = persistent_viz_corr.name | |
| persistent_viz_corr.close() | |
| persistent_viz_markers = tempfile.NamedTemporaryFile(mode='w', suffix='_markers.html', delete=False) | |
| persistent_viz_markers_path = persistent_viz_markers.name | |
| persistent_viz_markers.close() | |
| persistent_viz_network = tempfile.NamedTemporaryFile(mode='w', suffix='_network.html', delete=False) | |
| persistent_viz_network_path = persistent_viz_network.name | |
| persistent_viz_network.close() | |
| persistent_chamber_info = tempfile.NamedTemporaryFile(mode='w', suffix='_chamber_details.txt', delete=False) | |
| persistent_chamber_info_path = persistent_chamber_info.name | |
| persistent_chamber_info.close() | |
| try: | |
| # Load and validate data | |
| progress(0.05, desc=" Loading data file...") | |
| print("Loading data...") | |
| gr.Info(" Loading and validating data...", duration=5) | |
| adata, validation_msg = load_and_validate_data(uploaded_file) | |
| # Check if loading failed | |
| if adata is None: | |
| gr.Warning("Data loading failed - check error details", duration=8) | |
| error_msg = f" **Data Loading Failed**\n\n{validation_msg}" | |
| # Return all expected outputs (9) to match the Gradio interface: | |
| # output_text, output_file, viz1, viz2, viz3, viz4, viz5, chamber_info, chamber_info_file | |
| return error_msg, None, None, None, None, None, None, "", None | |
| gr.Info(f"✓ Data loaded: {adata.n_obs:,} cells × {adata.n_vars:,} genes", duration=5) | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| temp_path = Path(temp_dir) | |
| results_dir = temp_path / "results" | |
| results_dir.mkdir(exist_ok=True) | |
| # Save loaded data to temporary h5ad file for pipeline | |
| progress(0.15, desc="💾 Preparing data...") | |
| temp_data_file = temp_path / "loaded_data.h5ad" | |
| print(f"Saving loaded data to temporary file...") | |
| # Ensure any file handles are closed before writing | |
| import gc | |
| gc.collect() | |
| try: | |
| adata.write_h5ad(temp_data_file) | |
| except PermissionError: | |
| # If write fails, it might be because the original is still locked | |
| # Create a copy in memory and write that instead | |
| print(" ⚠ Permission issue, creating in-memory copy...") | |
| adata_copy = adata.copy() | |
| adata_copy.write_h5ad(temp_data_file) | |
| adata = adata_copy | |
| del adata_copy | |
| gc.collect() | |
| # Create config | |
| config = Config.default() | |
| config.data.max_cells_subset = int(max_cells) if max_cells else None | |
| config.data.max_genes_subset = int(max_genes) if max_genes else None | |
| config.paths.processed_data_dir = str(temp_path / "processed") | |
| config.paths.results_dir = str(results_dir) | |
| config.create_directories() | |
| # Run appropriate pipeline | |
| progress(0.20, desc=" Running analysis pipeline...") | |
| gr.Info(f" Running {analysis_type} analysis pipeline...", duration=8) | |
| if analysis_type == "comprehensive": | |
| pipeline = ComprehensivePipeline(config) | |
| elif analysis_type == "basic": | |
| pipeline = BasicPipeline(config) | |
| elif analysis_type == "multi_chamber" and include_chamber_analysis: | |
| pipeline = MultiChamberPipeline(config) | |
| else: | |
| pipeline = BasicPipeline(config) | |
| # Pass temporary h5ad file instead of original file | |
| print(f"=== Running {analysis_type.upper()} HeartMAP Pipeline ===") | |
| results = pipeline.run(str(temp_data_file), str(results_dir)) | |
| analyzed_adata = results.get('adata', adata) | |
| progress(0.40, desc="✓ Analysis complete, generating results...") | |
| gr.Info("✓ Analysis complete! Generating visualizations...", duration=5) | |
| # ===== CREATE COMPREHENSIVE SUMMARY ===== | |
| summary_data = { | |
| 'Metric': [], | |
| 'Value': [] | |
| } | |
| # Basic statistics | |
| summary_data['Metric'].extend([ | |
| 'Total Cells Analyzed', | |
| 'Total Genes Analyzed', | |
| 'Cell Clusters Identified', | |
| 'Mean Genes per Cell', | |
| 'Mean UMI per Cell' | |
| ]) | |
| n_clusters = analyzed_adata.obs['leiden'].nunique() if 'leiden' in analyzed_adata.obs.columns else 0 | |
| mean_genes = analyzed_adata.n_vars # Approximation | |
| mean_umi = 0 # Would need to calculate from data | |
| summary_data['Value'].extend([ | |
| f"{analyzed_adata.n_obs:,}", | |
| f"{analyzed_adata.n_vars:,}", | |
| str(n_clusters), | |
| str(mean_genes), | |
| str(mean_umi) | |
| ]) | |
| # ===== CHAMBER ANALYSIS ===== | |
| progress(0.50, desc=" Analyzing chamber distribution...") | |
| chamber_stats = None | |
| chamber_correlations = None | |
| chamber_markers = None | |
| if include_chamber_analysis and 'chamber' in analyzed_adata.obs.columns: | |
| gr.Info(" Performing chamber-specific analysis...", duration=5) | |
| # Filter out Unknown chambers | |
| valid_chambers = analyzed_adata.obs['chamber'] != 'Unknown' | |
| if valid_chambers.sum() == 0: | |
| summary_data['Metric'].append('Chamber Analysis') | |
| summary_data['Value'].append('⚠ No chamber information available') | |
| chamber_counts = pd.Series(dtype=int) # Empty series | |
| else: | |
| chamber_counts = analyzed_adata.obs.loc[valid_chambers, 'chamber'].value_counts() | |
| chamber_proportions = (chamber_counts / valid_chambers.sum() * 100).round(2) | |
| chamber_stats = pd.DataFrame({ | |
| 'Chamber': chamber_counts.index, | |
| 'Cell Count': chamber_counts.values, | |
| 'Percentage': chamber_proportions.values | |
| }) | |
| for chamber in chamber_counts.index: | |
| summary_data['Metric'].append(f"{chamber} Cell Count") | |
| summary_data['Value'].append(f"{chamber_counts[chamber]:,} ({chamber_proportions[chamber]:.1f}%)") | |
| # ===== CROSS-CHAMBER CORRELATION ANALYSIS ===== | |
| progress(0.60, desc=" Calculating chamber correlations...") | |
| print("Calculating cross-chamber gene expression correlations...") | |
| gr.Info(" Computing cross-chamber correlations...", duration=5) | |
| if len(chamber_counts) > 1: | |
| # Calculate mean expression per chamber | |
| chambers = analyzed_adata.obs['chamber'].unique() | |
| chamber_expr = {} | |
| for chamber in chambers: | |
| chamber_mask = analyzed_adata.obs['chamber'] == chamber | |
| X_chamber = analyzed_adata[chamber_mask].X | |
| if hasattr(X_chamber, 'toarray'): | |
| X_chamber = X_chamber.toarray() | |
| chamber_expr[chamber] = np.mean(X_chamber, axis=0).flatten() | |
| # Calculate pairwise correlations | |
| corr_data = [] | |
| for i, chamber1 in enumerate(chambers): | |
| for j, chamber2 in enumerate(chambers): | |
| if i <= j: # Only upper triangle | |
| corr = np.corrcoef(chamber_expr[chamber1], chamber_expr[chamber2])[0, 1] | |
| corr_data.append({ | |
| 'Chamber 1': chamber1, | |
| 'Chamber 2': chamber2, | |
| 'Correlation': round(float(corr), 3) | |
| }) | |
| chamber_correlations = pd.DataFrame(corr_data) | |
| # Add to summary | |
| summary_data['Metric'].append('Cross-Chamber Correlations') | |
| summary_data['Value'].append(f"{len(corr_data)} pairs analyzed") | |
| # Find highest and lowest correlations | |
| non_diagonal = chamber_correlations[chamber_correlations['Chamber 1'] != chamber_correlations['Chamber 2']] | |
| if len(non_diagonal) > 0: | |
| max_corr = non_diagonal.loc[non_diagonal['Correlation'].idxmax()] | |
| min_corr = non_diagonal.loc[non_diagonal['Correlation'].idxmin()] | |
| summary_data['Metric'].append('Highest Chamber Correlation') | |
| summary_data['Value'].append(f"{max_corr['Chamber 1']}-{max_corr['Chamber 2']} (r={max_corr['Correlation']:.3f})") | |
| summary_data['Metric'].append('Lowest Chamber Correlation') | |
| summary_data['Value'].append(f"{min_corr['Chamber 1']}-{min_corr['Chamber 2']} (r={min_corr['Correlation']:.3f})") | |
| # Identify chamber-specific marker genes | |
| print("Identifying chamber-specific marker genes...") | |
| marker_genes = {} | |
| for chamber in chambers: | |
| chamber_mask = analyzed_adata.obs['chamber'] == chamber | |
| other_mask = ~chamber_mask | |
| X_chamber = analyzed_adata[chamber_mask].X | |
| X_other = analyzed_adata[other_mask].X | |
| if hasattr(X_chamber, 'toarray'): | |
| X_chamber = X_chamber.toarray() | |
| X_other = X_other.toarray() | |
| # Calculate fold change | |
| chamber_mean = np.mean(X_chamber, axis=0).flatten() | |
| other_mean = np.mean(X_other, axis=0).flatten() | |
| # Avoid division by zero | |
| fold_change = np.log2((chamber_mean + 1) / (other_mean + 1)) | |
| # Get top markers (highest fold change) | |
| top_indices = np.argsort(fold_change)[-10:][::-1] # Top 10 | |
| top_genes = [analyzed_adata.var_names[i] for i in top_indices] | |
| top_fc = [fold_change[i] for i in top_indices] | |
| marker_genes[chamber] = list(zip(top_genes, top_fc)) | |
| chamber_markers = marker_genes | |
| summary_data['Metric'].append('Chamber-Specific Markers') | |
| summary_data['Value'].append(f"{sum(len(m) for m in marker_genes.values())} genes identified") | |
| # ===== COMMUNICATION HUBS ===== | |
| progress(0.70, desc=" Analyzing communication hubs...") | |
| hub_stats = None | |
| if include_communication_hubs: | |
| print("Communication hubs analysis requested...") | |
| gr.Info(" Identifying communication hub cell types...", duration=5) | |
| print(f" Available metadata columns: {list(analyzed_adata.obs.columns)}") | |
| # Detect cluster column (try multiple common names) | |
| cluster_col = None | |
| for col in ['leiden', 'louvain', 'Cluster', 'cluster', 'cell_type', 'celltype']: | |
| if col in analyzed_adata.obs.columns: | |
| cluster_col = col | |
| print(f" Using clustering column: '{cluster_col}'") | |
| break | |
| # Calculate hub scores (expression diversity + signalling potential) | |
| if cluster_col is not None: | |
| hub_scores = [] | |
| for cell_type in analyzed_adata.obs[cluster_col].unique(): | |
| cell_mask = analyzed_adata.obs[cluster_col] == cell_type | |
| # Get expression data and convert sparse to dense if needed | |
| X_subset = analyzed_adata[cell_mask].X | |
| if hasattr(X_subset, 'toarray'): | |
| X_subset = X_subset.toarray() | |
| expr_mean = np.mean(X_subset) | |
| expr_std = np.std(X_subset) | |
| expr_var = np.var(X_subset) | |
| hub_score = (expr_std * expr_mean) / (expr_var + 1) if expr_var > 0 else 0 | |
| hub_scores.append({ | |
| 'Cell Type': f"Cluster {cell_type}", | |
| 'Hub Score': round(float(hub_score), 4), | |
| 'Cell Count': cell_mask.sum() | |
| }) | |
| hub_stats = pd.DataFrame(hub_scores).sort_values('Hub Score', ascending=False) | |
| print(f" ✓ Calculated hub scores for {len(hub_stats)} cell types") | |
| summary_data['Metric'].append('Top Communication Hub') | |
| top_hub = hub_stats.iloc[0]['Cell Type'] if len(hub_stats) > 0 else "N/A" | |
| summary_data['Value'].append(top_hub) | |
| else: | |
| print(" ⚠ No clustering column found (tried: leiden, louvain, Cluster, cluster, cell_type, celltype)") | |
| summary_df = pd.DataFrame(summary_data) | |
| summary_df.to_csv(persistent_csv_path, index=False) | |
| # ===== CREATE INTERACTIVE VISUALIZATIONS ===== | |
| progress(0.80, desc=" Creating visualizations...") | |
| gr.Info(" Generating interactive visualizations...", duration=5) | |
| viz_chamber_file = None | |
| viz_hubs_file = None | |
| viz_corr_file = None | |
| viz_markers_file = None | |
| viz_network_file = None | |
| # 1. Chamber distribution plot (Interactive Plotly HTML) | |
| if chamber_stats is not None: | |
| fig_chamber = px.pie( | |
| chamber_stats, | |
| values='Cell Count', | |
| names='Chamber', | |
| title='Chamber Distribution - Interactive', | |
| hole=0.3, | |
| color_discrete_sequence=px.colors.qualitative.Set3 | |
| ) | |
| fig_chamber.update_traces( | |
| textposition='inside', | |
| textinfo='percent+label', | |
| hovertemplate='<b>%{label}</b><br>Cells: %{value:,}<br>Percentage: %{percent}<extra></extra>' | |
| ) | |
| fig_chamber.update_layout( | |
| width=800, | |
| height=600, | |
| font=dict(size=14), | |
| title_font_size=18, | |
| showlegend=True, | |
| legend=dict(orientation="v", yanchor="middle", y=0.5, xanchor="left", x=1.05) | |
| ) | |
| # Save as standalone HTML with full interactivity | |
| viz_chamber_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False).name | |
| fig_chamber.write_html( | |
| viz_chamber_file, | |
| include_plotlyjs='cdn', # Use CDN for smaller file size | |
| config={'displayModeBar': True, 'displaylogo': False} | |
| ) | |
| print(f"✓ Created interactive chamber visualization: {viz_chamber_file}") | |
| # 2. Hub scores plot (Interactive Plotly HTML) | |
| if hub_stats is not None and len(hub_stats) > 0: | |
| fig_hubs = px.bar( | |
| hub_stats.head(15), # Show top 15 for better detail | |
| x='Hub Score', | |
| y='Cell Type', | |
| title='Communication Hub Scores - Interactive (Top 15)', | |
| orientation='h', | |
| color='Hub Score', | |
| color_continuous_scale='Viridis', | |
| labels={'Hub Score': 'Hub Score', 'Cell Type': 'Cell Type'} | |
| ) | |
| fig_hubs.update_traces( | |
| hovertemplate='<b>%{y}</b><br>Hub Score: %{x:.4f}<br>Cell Count: %{customdata[0]:,}<extra></extra>', | |
| customdata=hub_stats.head(15)[['Cell Count']].values | |
| ) | |
| fig_hubs.update_layout( | |
| width=900, | |
| height=700, | |
| font=dict(size=12), | |
| title_font_size=18, | |
| xaxis_title='Hub Score (higher = stronger communication hub)', | |
| yaxis_title='Cell Type', | |
| yaxis={'categoryorder': 'total ascending'} | |
| ) | |
| # Save as standalone HTML with full interactivity | |
| viz_hubs_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False).name | |
| fig_hubs.write_html( | |
| viz_hubs_file, | |
| include_plotlyjs='cdn', | |
| config={'displayModeBar': True, 'displaylogo': False} | |
| ) | |
| print(f"✓ Created interactive hub scores visualization: {viz_hubs_file}") | |
| # 3. Cross-Chamber Correlation Matrix (Interactive Plotly HTML) | |
| if chamber_correlations is not None and len(chamber_correlations) > 0: | |
| print("Creating cross-chamber correlation matrix...") | |
| # Create correlation matrix for heatmap | |
| chambers = chamber_correlations['Chamber 1'].unique() | |
| corr_matrix = np.eye(len(chambers)) | |
| chamber_to_idx = {ch: i for i, ch in enumerate(chambers)} | |
| for _, row in chamber_correlations.iterrows(): | |
| i = chamber_to_idx[row['Chamber 1']] | |
| j = chamber_to_idx[row['Chamber 2']] | |
| corr_matrix[i, j] = row['Correlation'] | |
| corr_matrix[j, i] = row['Correlation'] | |
| fig_corr = go.Figure(data=go.Heatmap( | |
| z=corr_matrix, | |
| x=list(chambers), | |
| y=list(chambers), | |
| colorscale='RdBu_r', | |
| zmid=0.9, | |
| zmin=0.85, | |
| zmax=1.0, | |
| text=corr_matrix, | |
| texttemplate='%{text:.3f}', | |
| textfont={"size": 14}, | |
| colorbar=dict(title="Correlation (r)") | |
| )) | |
| fig_corr.update_layout( | |
| title='Cross-Chamber Gene Expression Correlations', | |
| xaxis_title='Chamber', | |
| yaxis_title='Chamber', | |
| width=700, | |
| height=700, | |
| font=dict(size=14), | |
| title_font_size=18 | |
| ) | |
| viz_corr_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False).name | |
| fig_corr.write_html( | |
| viz_corr_file, | |
| include_plotlyjs='cdn', | |
| config={'displayModeBar': True, 'displaylogo': False} | |
| ) | |
| print(f"✓ Created cross-chamber correlation matrix: {viz_corr_file}") | |
| # 4. Chamber-Specific Marker Genes (Interactive Plotly HTML) | |
| if chamber_markers is not None and len(chamber_markers) > 0: | |
| print("Creating chamber-specific marker visualization...") | |
| # Prepare data for grouped bar chart | |
| marker_data = [] | |
| for chamber, genes in chamber_markers.items(): | |
| for gene, fc in genes[:5]: # Top 5 per chamber | |
| marker_data.append({ | |
| 'Chamber': chamber, | |
| 'Gene': gene, | |
| 'Log2 Fold Change': round(float(fc), 2) | |
| }) | |
| marker_df = pd.DataFrame(marker_data) | |
| fig_markers = px.bar( | |
| marker_df, | |
| x='Gene', | |
| y='Log2 Fold Change', | |
| color='Chamber', | |
| barmode='group', | |
| title='Chamber-Specific Marker Genes (Top 5 per Chamber)', | |
| labels={'Log2 Fold Change': 'Log2 Fold Change (vs other chambers)'}, | |
| color_discrete_sequence=px.colors.qualitative.Set2 | |
| ) | |
| fig_markers.update_layout( | |
| width=1200, | |
| height=600, | |
| font=dict(size=12), | |
| title_font_size=18, | |
| xaxis_title='Marker Gene', | |
| yaxis_title='Log2 Fold Change', | |
| xaxis={'categoryorder': 'total descending'}, | |
| hovermode='x unified' | |
| ) | |
| viz_markers_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False).name | |
| fig_markers.write_html( | |
| viz_markers_file, | |
| include_plotlyjs='cdn', | |
| config={'displayModeBar': True, 'displaylogo': False} | |
| ) | |
| print(f"✓ Created chamber-specific markers visualization: {viz_markers_file}") | |
| # 5. Communication Network Graph (Interactive knowledge graph) | |
| if include_communication_hubs and hub_stats is not None and len(hub_stats) > 0: | |
| progress(0.90, desc=" Building communication network...") | |
| gr.Info(" Creating cell-cell communication network...", duration=5) | |
| print("Creating interactive communication network...") | |
| viz_network_file = create_communication_network(analyzed_adata, hub_stats, chamber_stats) | |
| if viz_network_file: | |
| print(f"✓ Created interactive communication network: {viz_network_file}") | |
| # Count total visualizations created | |
| total_viz = sum([1 for v in [viz_chamber_file, viz_hubs_file, viz_corr_file, viz_markers_file, viz_network_file] if v is not None]) | |
| print(f"✓ Total interactive visualizations created: {total_viz}") # ===== CREATE COMPREHENSIVE OUTPUT MESSAGE ===== | |
| output_msg = f""" | |
| # HeartMAP Analysis Complete! | |
| ## Dataset Summary | |
| - **Total Cells Analyzed:** {analyzed_adata.n_obs:,} | |
| - **Total Genes Analyzed:** {analyzed_adata.n_vars:,} | |
| - **Cell Clusters Identified:** {n_clusters} | |
| - **Analysis Type:** {analysis_type.upper()} | |
| """ | |
| if chamber_stats is not None: | |
| output_msg += f""" | |
| ## Chamber-Specific Results | |
| Chamber analysis identified {len(chamber_stats)} distinct chambers: | |
| - **Right Atrium (RA):** {chamber_stats[chamber_stats['Chamber']=='RA']['Cell Count'].values[0] if 'RA' in chamber_stats['Chamber'].values else 'N/A'} cells | |
| - **Right Ventricle (RV):** {chamber_stats[chamber_stats['Chamber']=='RV']['Cell Count'].values[0] if 'RV' in chamber_stats['Chamber'].values else 'N/A'} cells | |
| - **Left Atrium (LA):** {chamber_stats[chamber_stats['Chamber']=='LA']['Cell Count'].values[0] if 'LA' in chamber_stats['Chamber'].values else 'N/A'} cells | |
| - **Left Ventricle (LV):** {chamber_stats[chamber_stats['Chamber']=='LV']['Cell Count'].values[0] if 'LV' in chamber_stats['Chamber'].values else 'N/A'} cells | |
| """ | |
| if chamber_correlations is not None and len(chamber_correlations) > 0: | |
| non_diag = chamber_correlations[chamber_correlations['Chamber 1'] != chamber_correlations['Chamber 2']] | |
| if len(non_diag) > 0: | |
| max_corr = non_diag.loc[non_diag['Correlation'].idxmax()] | |
| min_corr = non_diag.loc[non_diag['Correlation'].idxmin()] | |
| output_msg += f""" | |
| ## Cross-Chamber Correlations | |
| Gene expression correlation analysis: | |
| - **Highest:** {max_corr['Chamber 1']}-{max_corr['Chamber 2']} (r = {max_corr['Correlation']:.3f}) | |
| - **Lowest:** {min_corr['Chamber 1']}-{min_corr['Chamber 2']} (r = {min_corr['Correlation']:.3f}) | |
| """ | |
| if chamber_markers is not None: | |
| total_markers = sum(len(m) for m in chamber_markers.values()) | |
| output_msg += f""" | |
| ## Chamber-Specific Markers | |
| Identified **{total_markers} chamber-specific marker genes** across {len(chamber_markers)} chambers | |
| """ | |
| if hub_stats is not None and len(hub_stats) > 0: | |
| output_msg += f""" | |
| ## Communication Hubs | |
| Top communication hub cells (manuscript range: 0.037-0.047): | |
| - **Top Hub:** {hub_stats.iloc[0]['Cell Type']} (Score: {hub_stats.iloc[0]['Hub Score']:.4f}) | |
| - **Range:** {hub_stats['Hub Score'].min():.4f} - {hub_stats['Hub Score'].max():.4f} | |
| Communication hubs coordinate cellular interactions and represent therapeutic targets. | |
| """ | |
| output_msg += f""" | |
| ## Results Available | |
| Summary statistics (CSV) \n | |
| Interactive visualizations ({total_viz} HTML files) \n | |
| Chamber composition analysis \n | |
| Cross-chamber correlation matrix \n | |
| Chamber-specific marker genes \n | |
| Communication hub identification \n | |
| Cell-cell communication network | |
| ## Interactive Visualizations | |
| Download the HTML files below for fully interactive analysis! | |
| """ | |
| # Prepare chamber info text | |
| chamber_info_text = "" | |
| if chamber_stats is not None: | |
| chamber_info_text = chamber_stats.to_string(index=False) | |
| if chamber_correlations is not None: | |
| chamber_info_text += "\n\n=== Cross-Chamber Correlations ===\n" | |
| chamber_info_text += chamber_correlations.to_string(index=False) | |
| if chamber_markers is not None: | |
| chamber_info_text += "\n\n=== Chamber-Specific Marker Genes ===\n" | |
| for chamber, genes in chamber_markers.items(): | |
| chamber_info_text += f"\n{chamber}: {', '.join([g[0] for g in genes[:5]])}" | |
| # Save chamber info to file | |
| chamber_info_file_path = None | |
| if chamber_info_text: | |
| with open(persistent_chamber_info_path, 'w', encoding='utf-8') as f: | |
| f.write("=" * 80 + "\n") | |
| f.write("CHAMBER DETAILS & MARKER GENES\n") | |
| f.write("=" * 80 + "\n\n") | |
| f.write(chamber_info_text) | |
| chamber_info_file_path = persistent_chamber_info_path | |
| # Copy visualization files to persistent location before temp_dir is deleted | |
| if viz_chamber_file and Path(viz_chamber_file).exists(): | |
| shutil.copy2(viz_chamber_file, persistent_viz_chamber_path) | |
| viz_chamber_file = persistent_viz_chamber_path | |
| else: | |
| viz_chamber_file = None | |
| if viz_hubs_file and Path(viz_hubs_file).exists(): | |
| shutil.copy2(viz_hubs_file, persistent_viz_hubs_path) | |
| viz_hubs_file = persistent_viz_hubs_path | |
| else: | |
| viz_hubs_file = None | |
| if viz_corr_file and Path(viz_corr_file).exists(): | |
| shutil.copy2(viz_corr_file, persistent_viz_corr_path) | |
| viz_corr_file = persistent_viz_corr_path | |
| else: | |
| viz_corr_file = None | |
| if viz_markers_file and Path(viz_markers_file).exists(): | |
| shutil.copy2(viz_markers_file, persistent_viz_markers_path) | |
| viz_markers_file = persistent_viz_markers_path | |
| else: | |
| viz_markers_file = None | |
| if viz_network_file and Path(viz_network_file).exists(): | |
| shutil.copy2(viz_network_file, persistent_viz_network_path) | |
| viz_network_file = persistent_viz_network_path | |
| else: | |
| viz_network_file = None | |
| # Final progress update | |
| progress(1.0, desc=" Analysis complete!") | |
| gr.Info(" Analysis complete! Download your results below.", duration=8) | |
| return ( | |
| output_msg, | |
| persistent_csv_path, | |
| viz_chamber_file, | |
| viz_hubs_file, | |
| viz_corr_file, | |
| viz_markers_file, | |
| viz_network_file, | |
| chamber_info_text if chamber_info_text else "No chamber data", | |
| chamber_info_file_path | |
| ) | |
| except Exception as e: | |
| import traceback | |
| error_type = type(e).__name__ | |
| # Show error notification | |
| gr.Warning(f"Analysis failed: {error_type}", duration=10) | |
| # Handle specific error types | |
| if "ClientDisconnect" in error_type or "ClientDisconnect" in str(e): | |
| error_msg = """**Upload Interrupted** | |
| The file upload was interrupted. This usually happens when: | |
| - The file is very large (>100MB) | |
| - Network connection is unstable | |
| - Browser tab was closed during upload | |
| **Solutions:** | |
| 1. Try uploading a smaller file (<100MB) | |
| 2. Ensure stable internet connection | |
| 3. Keep this browser tab open during upload | |
| 4. For large datasets, consider subsetting your data first | |
| """ | |
| else: | |
| error_msg = f"**Error during analysis:**\n\n{str(e)}\n\n**Technical details:**\n```\n{traceback.format_exc()}\n```" | |
| return error_msg, None, None, None, None, None, None, "", None | |
| # Create Gradio interface with enhanced features | |
| with gr.Blocks( | |
| title="HeartMAP: Heart Multi-chamber Analysis Platform", | |
| theme=gr.themes.Soft(), | |
| analytics_enabled=False, | |
| css=""" | |
| .resizable-textbox textarea { | |
| resize: both !important; | |
| overflow: auto !important; | |
| min-height: 200px !important; | |
| max-height: none !important; | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| # HeartMAP: Multi-chamber Heart Analysis | |
| Single-cell RNA-seq analysis for cardiac chamber biology | |
| **Max file size: 10GB** • Large files may take 10-30 minutes | |
| **Analysis Features:** | |
| - Cell type annotation & QC | |
| - Chamber-specific analysis (RA, RV, LA, LV) | |
| - Communication hub identification | |
| - Cross-chamber correlations & marker genes | |
| - Interactive visualizations | |
| **Supported Formats:** | |
| `.h5ad` `.h5` `.h5mu` `.mtx` `.loom` `.csv` `.tsv` `.tar` `.parquet` `.zarr` `.hdf5` | |
| <details><summary>View all formats</summary> | |
| - **Direct:** AnnData (.h5ad, .h5mu, .zarr), 10X (.h5, .mtx), Loom (.loom), Text (.csv/.tsv/.txt), Archives (.tar/.tar.gz), Parquet/Arrow, HDF5 | |
| - **Needs conversion:** Seurat (.h5seurat/.rds) → use SeuratDisk | FASTQ/BAM → use Cell Ranger | |
| </details> | |
| --- | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Upload Data") | |
| file_input = gr.File( | |
| label="Upload single-cell data (15+ formats supported)", | |
| file_types=[ | |
| ".h5ad", ".h5mu", ".zarr", # AnnData variants | |
| ".h5", ".mtx", # 10X Genomics | |
| ".loom", # Loom | |
| ".csv", ".tsv", ".txt", # Text matrices | |
| ".tar", ".tar.gz", ".tgz", # Archives | |
| ".parquet", ".arrow", # Columnar | |
| ".hdf5", # Generic HDF5 | |
| ".h5seurat", ".rds", ".rdata" # R/Seurat (with conversion guidance) | |
| ], | |
| type="filepath" | |
| ) | |
| gr.Markdown("### Analysis Settings") | |
| analysis_type = gr.Dropdown( | |
| choices=["basic", "comprehensive", "multi_chamber"], | |
| value="comprehensive", | |
| label="Analysis Type" | |
| ) | |
| gr.Markdown("### Advanced Options") | |
| max_cells = gr.Number( | |
| label="Max Cells (for memory optimization)", | |
| value=50000, | |
| precision=0 | |
| ) | |
| max_genes = gr.Number( | |
| label="Max Genes (for memory optimization)", | |
| value=5000, | |
| precision=0 | |
| ) | |
| gr.Markdown("### Chamber Analysis") | |
| include_chamber = gr.Checkbox( | |
| label="Enable chamber-specific analysis", | |
| value=True | |
| ) | |
| include_hubs = gr.Checkbox( | |
| label="Enable communication hub analysis", | |
| value=True | |
| ) | |
| analyze_btn = gr.Button("Run Analysis", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Results & Visualizations") | |
| output_text = gr.Markdown( | |
| label="Analysis Summary", | |
| value="Results will appear here after analysis" | |
| ) | |
| gr.Markdown("### Downloads") | |
| output_file = gr.File( | |
| label="Download Results CSV", | |
| file_types=[".csv"] | |
| ) | |
| gr.Markdown(""" | |
| ### Interactive Visualizations | |
| Download the HTML files below and open them in your browser for fully interactive charts: | |
| - **Zoom, pan, and hover** for detailed information | |
| - **Click legend items** to show/hide data | |
| - **Export** as PNG from the chart menu | |
| """) | |
| viz_file_1 = gr.File( | |
| label="Chamber Distribution (Interactive HTML)", | |
| file_types=[".html"] | |
| ) | |
| viz_file_2 = gr.File( | |
| label="Communication Hubs (Interactive HTML)", | |
| file_types=[".html"] | |
| ) | |
| viz_file_3 = gr.File( | |
| label="Cross-Chamber Correlations (Interactive HTML)", | |
| file_types=[".html"] | |
| ) | |
| viz_file_4 = gr.File( | |
| label="Chamber-Specific Markers (Interactive HTML)", | |
| file_types=[".html"] | |
| ) | |
| viz_file_5 = gr.File( | |
| label="Communication Network (Interactive HTML)", | |
| file_types=[".html"] | |
| ) | |
| chamber_info = gr.Textbox( | |
| label="Chamber Details & Marker Genes", | |
| interactive=False, | |
| max_lines=50, | |
| elem_classes="resizable-textbox" | |
| ) | |
| chamber_info_file = gr.File( | |
| label="Download Chamber Details & Marker Genes (TXT)", | |
| file_types=[".txt"] | |
| ) | |
| # Analysis pipeline | |
| analyze_btn.click( | |
| fn=analyze_heart_data, | |
| inputs=[ | |
| file_input, | |
| analysis_type, | |
| max_cells, | |
| max_genes, | |
| include_chamber, | |
| include_hubs | |
| ], | |
| outputs=[ | |
| output_text, | |
| output_file, | |
| viz_file_1, | |
| viz_file_2, | |
| viz_file_3, | |
| viz_file_4, | |
| viz_file_5, | |
| chamber_info, | |
| chamber_info_file | |
| ] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### Usage Tips: | |
| | Scenario | Recommendation | | |
| |----------|----------------| | |
| | **First time** | Use "Basic" for quick exploration | | |
| | **Chamber biology** | Enable chamber-specific analysis | | |
| | **Drug targets** | Enable communication hubs | | |
| | **Limited memory** | Reduce cells/genes or use "Basic" | | |
| | **Full analysis** | Use "Comprehensive" mode | | |
| ### Documentation: | |
| - **Full User Guide:** https://github.com/Tumo505/HeartMap/blob/master/USER_GUIDE.md | |
| - **API Reference:** https://github.com/Tumo505/HeartMap/blob/master/API_DOCUMENTATION.md | |
| - **GitHub Repository:** https://github.com/Tumo505/HeartMap | |
| - **Python Package:** `pip install heartmap` | |
| ### Key References: | |
| - Chamber correlations: RV-LV (r=0.985), LA-LV (r=0.870) | |
| - Hub scores: 0.037-0.047 for atrial cardiomyocytes & adipocytes | |
| - Markers identified: 1,000+ per chamber | |
| - DEGs per pair: 150+ significantly different genes | |
| ### Citation: | |
| ``` | |
| Kgabeng, T., Wang, L., Ngwangwa, H., & Pandelani, T. (2025). | |
| HeartMAP: A Multi-Chamber Spatial Framework for Cardiac Cell-Cell Communication. | |
| Available at: https://github.com/Tumo505/HeartMap | |
| ``` | |
| """) | |
| if __name__ == "__main__": | |
| # Launch with increased timeout and file size limits for large datasets | |
| demo.queue(max_size=10).launch( | |
| max_file_size="10gb", # Increased to 10GB for very large single-cell datasets (max: 100GB) | |
| show_error=True | |
| ) | |