Spaces:
Sleeping
Sleeping
Deploy HeartMAP platform
Browse files- README.md +123 -6
- app.py +286 -0
- config.yaml +62 -0
- requirements.txt +14 -0
- src/heartmap.egg-info/PKG-INFO +456 -0
- src/heartmap.egg-info/SOURCES.txt +18 -0
- src/heartmap.egg-info/dependency_links.txt +1 -0
- src/heartmap.egg-info/entry_points.txt +3 -0
- src/heartmap.egg-info/requires.txt +46 -0
- src/heartmap.egg-info/top_level.txt +1 -0
- src/heartmap/__init__.py +22 -0
- src/heartmap/__pycache__/__init__.cpython-310.pyc +0 -0
- src/heartmap/api/__init__.py +259 -0
- src/heartmap/api/__pycache__/__init__.cpython-310.pyc +0 -0
- src/heartmap/config/__init__.py +156 -0
- src/heartmap/config/__pycache__/__init__.cpython-310.pyc +0 -0
- src/heartmap/data/__init__.py +250 -0
- src/heartmap/data/__pycache__/__init__.cpython-310.pyc +0 -0
- src/heartmap/models/__init__.py +486 -0
- src/heartmap/models/__pycache__/__init__.cpython-310.pyc +0 -0
- src/heartmap/pipelines/__init__.py +283 -0
- src/heartmap/pipelines/__pycache__/__init__.cpython-310.pyc +0 -0
- src/heartmap/utils/__init__.py +350 -0
- src/heartmap/utils/__pycache__/__init__.cpython-310.pyc +0 -0
README.md
CHANGED
|
@@ -1,12 +1,129 @@
|
|
| 1 |
---
|
| 2 |
-
title: HeartMAP
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: HeartMAP - Single-Cell Heart Analysis
|
| 3 |
+
emoji: 🫀
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.0.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
tags:
|
| 12 |
+
- single-cell
|
| 13 |
+
- genomics
|
| 14 |
+
- heart
|
| 15 |
+
- bioinformatics
|
| 16 |
+
- research-tool
|
| 17 |
+
- scanpy
|
| 18 |
+
- cell-communication
|
| 19 |
+
- cardiac-analysis
|
| 20 |
---
|
| 21 |
|
| 22 |
+
# 🫀 HeartMAP: Single-Cell Heart Analysis Platform
|
| 23 |
+
|
| 24 |
+
**Comprehensive analysis of single-cell RNA-seq data from cardiac tissue**
|
| 25 |
+
|
| 26 |
+
## 🔬 What is HeartMAP?
|
| 27 |
+
|
| 28 |
+
HeartMAP is a specialized bioinformatics platform designed for analyzing single-cell RNA sequencing (scRNA-seq) data from cardiac tissue. It provides researchers with tools to:
|
| 29 |
+
|
| 30 |
+
- 🧬 **Identify cell types** in heart tissue across multiple chambers
|
| 31 |
+
- 📊 **Analyze cell-to-cell communication** networks
|
| 32 |
+
- 🏠 **Compare different heart chambers** (RA, RV, LA, LV)
|
| 33 |
+
- 🔍 **Perform quality control** and preprocessing
|
| 34 |
+
- 📈 **Generate publication-ready visualizations**
|
| 35 |
+
|
| 36 |
+
## 🚀 How to Use
|
| 37 |
+
|
| 38 |
+
1. **Upload your data**: Provide a `.h5ad` (AnnData) file containing single-cell RNA-seq counts
|
| 39 |
+
2. **Choose analysis type**:
|
| 40 |
+
- **Basic**: Fast clustering and cell type identification
|
| 41 |
+
- **Comprehensive**: Full pipeline with communication analysis
|
| 42 |
+
3. **Set parameters**: Adjust maximum cells for your computational needs
|
| 43 |
+
4. **Run analysis**: Get results in minutes with interactive visualizations
|
| 44 |
+
5. **Download results**: Get CSV files with detailed analysis results
|
| 45 |
+
|
| 46 |
+
## 📊 Input Data Format
|
| 47 |
+
|
| 48 |
+
HeartMAP expects **AnnData (.h5ad) files** with:
|
| 49 |
+
- **Gene expression counts** (cells × genes matrix)
|
| 50 |
+
- Optional: **chamber annotations** in `.obs['chamber']`
|
| 51 |
+
- Optional: **existing cell type labels** in `.obs['cell_type']`
|
| 52 |
+
|
| 53 |
+
## 🎯 Analysis Pipeline
|
| 54 |
+
|
| 55 |
+
### Basic Analysis
|
| 56 |
+
- Quality control filtering
|
| 57 |
+
- Normalization and scaling
|
| 58 |
+
- Dimensionality reduction (PCA, UMAP)
|
| 59 |
+
- Leiden clustering
|
| 60 |
+
- Basic cell type annotation
|
| 61 |
+
|
| 62 |
+
### Comprehensive Analysis
|
| 63 |
+
- All basic analysis steps
|
| 64 |
+
- Advanced cell type identification
|
| 65 |
+
- Cell-to-cell communication analysis
|
| 66 |
+
- Multi-chamber comparative analysis
|
| 67 |
+
- Communication network visualization
|
| 68 |
+
- Statistical testing
|
| 69 |
+
|
| 70 |
+
## 📈 Output
|
| 71 |
+
|
| 72 |
+
- **Interactive visualizations**: UMAP plots, quality metrics
|
| 73 |
+
- **Summary statistics**: Cell counts, gene expression metrics
|
| 74 |
+
- **Downloadable results**: CSV files with cell annotations and metrics
|
| 75 |
+
- **Quality control plots**: Distribution of key metrics
|
| 76 |
+
|
| 77 |
+
## 🔬 Research Applications
|
| 78 |
+
|
| 79 |
+
HeartMAP is designed for researchers studying:
|
| 80 |
+
|
| 81 |
+
- **Cardiac development** and maturation
|
| 82 |
+
- **Heart disease mechanisms** (heart failure, cardiomyopathy)
|
| 83 |
+
- **Therapeutic responses** to treatments
|
| 84 |
+
- **Cell-type specific** gene expression patterns
|
| 85 |
+
- **Inter-chamber differences** in cellular composition
|
| 86 |
+
- **Cell communication networks** in healthy and diseased hearts
|
| 87 |
+
|
| 88 |
+
## 📚 Citation
|
| 89 |
+
|
| 90 |
+
If you use HeartMAP in your research, please cite:
|
| 91 |
+
|
| 92 |
+
```bibtex
|
| 93 |
+
@software{heartmap2024,
|
| 94 |
+
title={HeartMAP: A comprehensive platform for single-cell cardiac analysis},
|
| 95 |
+
author={[Your Name]},
|
| 96 |
+
year={2024},
|
| 97 |
+
url={https://github.com/Tumo505/GRAIL-Heart-cell-cell-communication}
|
| 98 |
+
}
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
## 🔗 Links
|
| 102 |
+
|
| 103 |
+
- **GitHub Repository**: [GRAIL-Heart-cell-cell-communication](https://github.com/Tumo505/GRAIL-Heart-cell-cell-communication)
|
| 104 |
+
- **Documentation**: See repository README for detailed usage
|
| 105 |
+
- **Issues & Support**: Report issues on GitHub
|
| 106 |
+
|
| 107 |
+
## ⚠️ Disclaimer
|
| 108 |
+
|
| 109 |
+
HeartMAP is a research tool. Results should be validated with additional experiments and domain expertise. This platform is designed to assist researchers but does not replace careful biological interpretation and validation.
|
| 110 |
+
|
| 111 |
+
## 🏷️ Technical Details
|
| 112 |
+
|
| 113 |
+
- **Framework**: Built with scanpy, pandas, and scikit-learn
|
| 114 |
+
- **Interface**: Gradio web application
|
| 115 |
+
- **Input**: AnnData (.h5ad) format
|
| 116 |
+
- **Analysis**: Statistical and computational biology methods
|
| 117 |
+
- **Visualization**: matplotlib, seaborn, plotly
|
| 118 |
+
|
| 119 |
+
## 🤝 Contributing
|
| 120 |
+
|
| 121 |
+
We welcome contributions! Please see our GitHub repository for:
|
| 122 |
+
- Bug reports and feature requests
|
| 123 |
+
- Code contributions
|
| 124 |
+
- Documentation improvements
|
| 125 |
+
- Example datasets and use cases
|
| 126 |
+
|
| 127 |
+
---
|
| 128 |
+
|
| 129 |
+
**Note**: This is academic research software. Always validate computational results with experimental evidence and consult domain experts for biological interpretation.
|
app.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HeartMAP: Single-Cell Heart Analysis Platform
|
| 3 |
+
Hugging Face Spaces Deployment
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import tempfile
|
| 10 |
+
import shutil
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
import traceback
|
| 13 |
+
|
| 14 |
+
# Add src to path for imports
|
| 15 |
+
sys.path.insert(0, 'src')
|
| 16 |
+
|
| 17 |
+
from heartmap import Config, HeartMapModel
|
| 18 |
+
from heartmap.pipelines import ComprehensivePipeline, BasicPipeline
|
| 19 |
+
from heartmap.data import DataLoader
|
| 20 |
+
import scanpy as sc
|
| 21 |
+
import pandas as pd
|
| 22 |
+
import matplotlib.pyplot as plt
|
| 23 |
+
import seaborn as sns
|
| 24 |
+
|
| 25 |
+
# Set up scanpy settings
|
| 26 |
+
sc.settings.verbosity = 1 # Reduce verbosity
|
| 27 |
+
sc.settings.set_figure_params(dpi=80, facecolor='white')
|
| 28 |
+
|
| 29 |
+
def analyze_heart_data(file, analysis_type, max_cells):
|
| 30 |
+
"""
|
| 31 |
+
Main analysis function for Gradio interface
|
| 32 |
+
"""
|
| 33 |
+
try:
|
| 34 |
+
if file is None:
|
| 35 |
+
return "❌ Please upload a .h5ad file", None, None
|
| 36 |
+
|
| 37 |
+
# Create temporary directory for this analysis
|
| 38 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
| 39 |
+
temp_path = Path(temp_dir)
|
| 40 |
+
|
| 41 |
+
# Save uploaded file
|
| 42 |
+
input_file = temp_path / "input_data.h5ad"
|
| 43 |
+
shutil.copy(file.name, input_file)
|
| 44 |
+
|
| 45 |
+
# Create configuration
|
| 46 |
+
config = Config.default()
|
| 47 |
+
config.data.max_cells_subset = int(max_cells)
|
| 48 |
+
config.data.max_genes_subset = min(5000, int(max_cells // 2))
|
| 49 |
+
config.data.test_mode = True
|
| 50 |
+
config.update_paths(str(temp_path))
|
| 51 |
+
config.create_directories()
|
| 52 |
+
|
| 53 |
+
# Create pipeline based on analysis type
|
| 54 |
+
if analysis_type == "Basic Analysis":
|
| 55 |
+
pipeline = BasicPipeline(config)
|
| 56 |
+
else:
|
| 57 |
+
pipeline = ComprehensivePipeline(config)
|
| 58 |
+
|
| 59 |
+
# Run analysis
|
| 60 |
+
results = pipeline.run(str(input_file), str(temp_path / "results"))
|
| 61 |
+
adata = results['adata']
|
| 62 |
+
|
| 63 |
+
# Generate summary
|
| 64 |
+
summary = f"""
|
| 65 |
+
## 🎉 Analysis Complete!
|
| 66 |
+
|
| 67 |
+
### 📊 Dataset Summary:
|
| 68 |
+
- **Cells analyzed**: {adata.n_obs:,}
|
| 69 |
+
- **Genes analyzed**: {adata.n_vars:,}
|
| 70 |
+
- **Cell types identified**: {len(adata.obs['leiden'].unique()) if 'leiden' in adata.obs else 'N/A'}
|
| 71 |
+
- **Analysis type**: {analysis_type}
|
| 72 |
+
|
| 73 |
+
### 🔬 Key Findings:
|
| 74 |
+
- Clustering performed using Leiden algorithm
|
| 75 |
+
- UMAP embedding computed for visualization
|
| 76 |
+
- {f"Chamber information available: {', '.join(adata.obs['chamber'].unique())}" if 'chamber' in adata.obs else "Single chamber analysis"}
|
| 77 |
+
|
| 78 |
+
### 📈 Quality Metrics:
|
| 79 |
+
- **Mean genes per cell**: {adata.obs['n_genes'].mean():.0f}
|
| 80 |
+
- **Mean counts per cell**: {adata.obs['total_counts'].mean():.0f}
|
| 81 |
+
- **Mitochondrial gene %**: {adata.obs['pct_counts_mt'].mean():.1f}%
|
| 82 |
+
|
| 83 |
+
### 🎯 Next Steps:
|
| 84 |
+
1. Download detailed results (CSV files)
|
| 85 |
+
2. Examine cell type markers
|
| 86 |
+
3. Explore communication networks
|
| 87 |
+
4. Compare with your research questions
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
# Create visualization
|
| 91 |
+
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
| 92 |
+
fig.suptitle('HeartMAP Analysis Results', fontsize=16, fontweight='bold')
|
| 93 |
+
|
| 94 |
+
# UMAP plot
|
| 95 |
+
if 'X_umap' in adata.obsm:
|
| 96 |
+
sc.pl.umap(adata, color='leiden', ax=axes[0,0], show=False, frameon=False)
|
| 97 |
+
axes[0,0].set_title('Cell Clusters (UMAP)')
|
| 98 |
+
else:
|
| 99 |
+
axes[0,0].text(0.5, 0.5, 'UMAP not available', ha='center', va='center')
|
| 100 |
+
axes[0,0].set_title('UMAP Visualization')
|
| 101 |
+
|
| 102 |
+
# Quality metrics
|
| 103 |
+
if 'n_genes' in adata.obs:
|
| 104 |
+
axes[0,1].hist(adata.obs['n_genes'], bins=50, alpha=0.7, color='skyblue')
|
| 105 |
+
axes[0,1].set_xlabel('Number of genes')
|
| 106 |
+
axes[0,1].set_ylabel('Number of cells')
|
| 107 |
+
axes[0,1].set_title('Genes per Cell Distribution')
|
| 108 |
+
|
| 109 |
+
# Mitochondrial gene percentage
|
| 110 |
+
if 'pct_counts_mt' in adata.obs:
|
| 111 |
+
axes[1,0].hist(adata.obs['pct_counts_mt'], bins=50, alpha=0.7, color='lightcoral')
|
| 112 |
+
axes[1,0].set_xlabel('Mitochondrial gene %')
|
| 113 |
+
axes[1,0].set_ylabel('Number of cells')
|
| 114 |
+
axes[1,0].set_title('Mitochondrial Gene Expression')
|
| 115 |
+
|
| 116 |
+
# Chamber distribution (if available)
|
| 117 |
+
if 'chamber' in adata.obs:
|
| 118 |
+
chamber_counts = adata.obs['chamber'].value_counts()
|
| 119 |
+
axes[1,1].pie(chamber_counts.values, labels=chamber_counts.index, autopct='%1.1f%%')
|
| 120 |
+
axes[1,1].set_title('Chamber Distribution')
|
| 121 |
+
else:
|
| 122 |
+
# Cluster distribution
|
| 123 |
+
if 'leiden' in adata.obs:
|
| 124 |
+
cluster_counts = adata.obs['leiden'].value_counts().head(10)
|
| 125 |
+
axes[1,1].bar(range(len(cluster_counts)), cluster_counts.values, color='lightgreen')
|
| 126 |
+
axes[1,1].set_xlabel('Cluster')
|
| 127 |
+
axes[1,1].set_ylabel('Number of cells')
|
| 128 |
+
axes[1,1].set_title('Top 10 Clusters')
|
| 129 |
+
|
| 130 |
+
plt.tight_layout()
|
| 131 |
+
|
| 132 |
+
# Save plot
|
| 133 |
+
plot_file = temp_path / "analysis_results.png"
|
| 134 |
+
plt.savefig(plot_file, dpi=150, bbox_inches='tight')
|
| 135 |
+
plt.close()
|
| 136 |
+
|
| 137 |
+
# Create downloadable results
|
| 138 |
+
results_df = pd.DataFrame({
|
| 139 |
+
'cell_id': adata.obs.index,
|
| 140 |
+
'cluster': adata.obs.get('leiden', 'Unknown'),
|
| 141 |
+
'n_genes': adata.obs.get('n_genes', 0),
|
| 142 |
+
'total_counts': adata.obs.get('total_counts', 0),
|
| 143 |
+
'pct_counts_mt': adata.obs.get('pct_counts_mt', 0),
|
| 144 |
+
'chamber': adata.obs.get('chamber', 'Unknown')
|
| 145 |
+
})
|
| 146 |
+
|
| 147 |
+
results_csv = temp_path / "heartmap_results.csv"
|
| 148 |
+
results_df.to_csv(results_csv, index=False)
|
| 149 |
+
|
| 150 |
+
return summary, str(plot_file), str(results_csv)
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
error_msg = f"""
|
| 154 |
+
## ❌ Analysis Error
|
| 155 |
+
|
| 156 |
+
**Error**: {str(e)}
|
| 157 |
+
|
| 158 |
+
**Possible solutions**:
|
| 159 |
+
1. Ensure your file is a valid .h5ad (AnnData) format
|
| 160 |
+
2. Try reducing the maximum number of cells
|
| 161 |
+
3. Check that your data contains gene expression counts
|
| 162 |
+
4. Verify the file is not corrupted
|
| 163 |
+
|
| 164 |
+
**Error details**:
|
| 165 |
+
```
|
| 166 |
+
{traceback.format_exc()}
|
| 167 |
+
```
|
| 168 |
+
"""
|
| 169 |
+
return error_msg, None, None
|
| 170 |
+
|
| 171 |
+
def create_demo_interface():
|
| 172 |
+
"""Create Gradio interface"""
|
| 173 |
+
|
| 174 |
+
with gr.Blocks(title="HeartMAP: Heart Cell Communication Analysis", theme=gr.themes.Soft()) as demo:
|
| 175 |
+
gr.Markdown("""
|
| 176 |
+
# 🫀 HeartMAP: Single-Cell Heart Analysis Platform
|
| 177 |
+
|
| 178 |
+
**Analyze single-cell RNA-seq data from cardiac tissue across multiple heart chambers**
|
| 179 |
+
|
| 180 |
+
Upload your .h5ad file to get started with comprehensive heart cell analysis including:
|
| 181 |
+
- 🔬 Cell type identification and clustering
|
| 182 |
+
- 📊 Quality control and preprocessing
|
| 183 |
+
- 🗣️ Cell-to-cell communication analysis
|
| 184 |
+
- 🏠 Multi-chamber comparative analysis
|
| 185 |
+
- 📈 Advanced visualizations
|
| 186 |
+
|
| 187 |
+
## 📚 About HeartMAP
|
| 188 |
+
HeartMAP is a comprehensive platform for analyzing single-cell RNA sequencing data from cardiac tissue.
|
| 189 |
+
It provides tools for cell annotation, communication analysis, and multi-chamber comparison studies.
|
| 190 |
+
|
| 191 |
+
**Example datasets**: Use 10X Genomics h5ad files or any AnnData-compatible single-cell data.
|
| 192 |
+
""")
|
| 193 |
+
|
| 194 |
+
with gr.Row():
|
| 195 |
+
with gr.Column(scale=1):
|
| 196 |
+
gr.Markdown("## 📤 Upload Data")
|
| 197 |
+
file_input = gr.File(
|
| 198 |
+
label="Upload .h5ad file",
|
| 199 |
+
file_types=[".h5ad"],
|
| 200 |
+
file_count="single"
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
analysis_type = gr.Dropdown(
|
| 204 |
+
choices=["Basic Analysis", "Comprehensive Analysis"],
|
| 205 |
+
value="Basic Analysis",
|
| 206 |
+
label="Analysis Type",
|
| 207 |
+
info="Basic: Fast clustering and annotation. Comprehensive: Full pipeline with communication analysis."
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
max_cells = gr.Slider(
|
| 211 |
+
minimum=1000,
|
| 212 |
+
maximum=20000,
|
| 213 |
+
value=5000,
|
| 214 |
+
step=1000,
|
| 215 |
+
label="Maximum Cells to Analyze",
|
| 216 |
+
info="Reduce for faster analysis or if you have memory constraints"
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
analyze_btn = gr.Button("🚀 Start Analysis", variant="primary", size="lg")
|
| 220 |
+
|
| 221 |
+
with gr.Column(scale=2):
|
| 222 |
+
gr.Markdown("## 📊 Results")
|
| 223 |
+
output_text = gr.Markdown(value="Upload a file and click 'Start Analysis' to begin...")
|
| 224 |
+
output_plot = gr.Image(label="Analysis Visualization")
|
| 225 |
+
output_file = gr.File(label="Download Results (CSV)")
|
| 226 |
+
|
| 227 |
+
# Examples
|
| 228 |
+
with gr.Row():
|
| 229 |
+
gr.Markdown("""
|
| 230 |
+
## 📋 Example Analyses
|
| 231 |
+
|
| 232 |
+
### What HeartMAP can analyze:
|
| 233 |
+
- **Healthy heart tissue** from multiple chambers (RA, RV, LA, LV)
|
| 234 |
+
- **Disease models** (heart failure, cardiomyopathy, etc.)
|
| 235 |
+
- **Development studies** (embryonic, postnatal heart development)
|
| 236 |
+
- **Treatment responses** (drug effects, therapeutic interventions)
|
| 237 |
+
|
| 238 |
+
### Expected file format:
|
| 239 |
+
- **AnnData (.h5ad)** format with gene expression counts
|
| 240 |
+
- **Cells as observations** (rows) and **genes as variables** (columns)
|
| 241 |
+
- Optional: **chamber annotations** in `.obs['chamber']`
|
| 242 |
+
- Optional: **cell type annotations** in `.obs['cell_type']`
|
| 243 |
+
|
| 244 |
+
### Analysis outputs:
|
| 245 |
+
- **Cell clustering** and UMAP visualization
|
| 246 |
+
- **Quality control** metrics and filtering
|
| 247 |
+
- **Cell type identification** using marker genes
|
| 248 |
+
- **Communication networks** between cell types
|
| 249 |
+
- **Chamber-specific** analysis and comparisons
|
| 250 |
+
""")
|
| 251 |
+
|
| 252 |
+
# Set up the analysis trigger
|
| 253 |
+
analyze_btn.click(
|
| 254 |
+
fn=analyze_heart_data,
|
| 255 |
+
inputs=[file_input, analysis_type, max_cells],
|
| 256 |
+
outputs=[output_text, output_plot, output_file],
|
| 257 |
+
show_progress=True
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
# Footer
|
| 261 |
+
gr.Markdown("""
|
| 262 |
+
---
|
| 263 |
+
### 📖 Citation
|
| 264 |
+
If you use HeartMAP in your research, please cite:
|
| 265 |
+
```
|
| 266 |
+
HeartMAP: A comprehensive platform for single-cell cardiac analysis
|
| 267 |
+
https://github.com/Tumo505/GRAIL-Heart-cell-cell-communication
|
| 268 |
+
```
|
| 269 |
+
|
| 270 |
+
### 🔗 Links
|
| 271 |
+
- **GitHub**: [HeartMAP Repository](https://github.com/Tumo505/GRAIL-Heart-cell-cell-communication)
|
| 272 |
+
- **Documentation**: See README for detailed usage instructions
|
| 273 |
+
- **Issues**: Report bugs or request features on GitHub
|
| 274 |
+
|
| 275 |
+
**Note**: This is a research tool. Results should be validated with additional experiments.
|
| 276 |
+
""")
|
| 277 |
+
|
| 278 |
+
return demo
|
| 279 |
+
|
| 280 |
+
if __name__ == "__main__":
|
| 281 |
+
demo = create_demo_interface()
|
| 282 |
+
demo.launch(
|
| 283 |
+
server_name="0.0.0.0",
|
| 284 |
+
server_port=7860,
|
| 285 |
+
share=False
|
| 286 |
+
)
|
config.yaml
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HeartMAP Configuration for Hugging Face Deployment
|
| 2 |
+
# Optimized for cloud environment with memory constraints
|
| 3 |
+
|
| 4 |
+
data:
|
| 5 |
+
# File paths (will be set dynamically)
|
| 6 |
+
raw_data_dir: "data/raw"
|
| 7 |
+
processed_data_dir: "data/processed"
|
| 8 |
+
|
| 9 |
+
# Processing limits (optimized for cloud)
|
| 10 |
+
max_cells_subset: 10000 # Reduced for cloud deployment
|
| 11 |
+
max_genes_subset: 5000 # Reduced for faster processing
|
| 12 |
+
min_genes: 200 # Minimum genes per cell
|
| 13 |
+
min_cells: 3 # Minimum cells per gene
|
| 14 |
+
max_pct_mt: 25 # Maximum mitochondrial gene percentage
|
| 15 |
+
|
| 16 |
+
# Performance settings
|
| 17 |
+
test_mode: true # Enable for faster demo processing
|
| 18 |
+
use_raw_counts: true
|
| 19 |
+
backup_processed: false # Disable to save space
|
| 20 |
+
|
| 21 |
+
analysis:
|
| 22 |
+
# Clustering parameters
|
| 23 |
+
resolution: 0.5 # Leiden clustering resolution
|
| 24 |
+
n_neighbors: 15 # Reduced for speed
|
| 25 |
+
n_pcs: 30 # Reduced principal components
|
| 26 |
+
|
| 27 |
+
# Marker gene analysis
|
| 28 |
+
n_marker_genes: 20 # Reduced for cloud deployment
|
| 29 |
+
min_fold_change: 1.5
|
| 30 |
+
max_adj_pvalue: 0.05
|
| 31 |
+
|
| 32 |
+
# Visualization
|
| 33 |
+
embedding_method: "umap"
|
| 34 |
+
color_palette: "tab10"
|
| 35 |
+
|
| 36 |
+
model:
|
| 37 |
+
# Model settings
|
| 38 |
+
save_intermediate: false # Disable to save space
|
| 39 |
+
use_gpu: false # CPU only for cloud
|
| 40 |
+
batch_size: 1000 # Smaller batches for memory
|
| 41 |
+
random_state: 42
|
| 42 |
+
|
| 43 |
+
# Feature selection
|
| 44 |
+
highly_variable_genes: 2000 # Reduced for speed
|
| 45 |
+
flavor: "seurat_v3"
|
| 46 |
+
|
| 47 |
+
communication:
|
| 48 |
+
# Cell communication analysis
|
| 49 |
+
min_expression_threshold: 0.1
|
| 50 |
+
pvalue_threshold: 0.05
|
| 51 |
+
top_interactions: 50 # Reduced for demo
|
| 52 |
+
|
| 53 |
+
paths:
|
| 54 |
+
# Output directories (will be set dynamically)
|
| 55 |
+
base_dir: "."
|
| 56 |
+
results_dir: "results"
|
| 57 |
+
figures_dir: "figures"
|
| 58 |
+
logs_dir: "logs"
|
| 59 |
+
|
| 60 |
+
# Model persistence
|
| 61 |
+
models_dir: "models"
|
| 62 |
+
cache_dir: "cache"
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scanpy>=1.9.0
|
| 2 |
+
pandas>=1.5.0
|
| 3 |
+
numpy>=1.21.0
|
| 4 |
+
scipy>=1.9.0
|
| 5 |
+
scikit-learn>=1.1.0
|
| 6 |
+
matplotlib>=3.5.0
|
| 7 |
+
seaborn>=0.11.0
|
| 8 |
+
anndata>=0.8.0
|
| 9 |
+
plotly>=5.0.0
|
| 10 |
+
networkx>=2.8.0
|
| 11 |
+
tqdm
|
| 12 |
+
statsmodels
|
| 13 |
+
pyyaml>=6.0
|
| 14 |
+
gradio>=4.0.0
|
src/heartmap.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,456 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: heartmap
|
| 3 |
+
Version: 1.0.0
|
| 4 |
+
Summary: Heart Multi-chamber Analysis Platform for single-cell RNA-seq
|
| 5 |
+
Home-page: https://github.com/Tumo505/HeartMap
|
| 6 |
+
Author: Tumo Kgabeng, Lulu Wang, Harry Ngwangwa, Thanyani Pandelani
|
| 7 |
+
Author-email: 28346416@mylife.unisa.ac.za
|
| 8 |
+
Project-URL: Documentation, https://github.com/Tumo505/HeartMap/wiki
|
| 9 |
+
Project-URL: Source, https://github.com/Tumo505/HeartMap
|
| 10 |
+
Project-URL: Tracker, https://github.com/Tumo505/HeartMap/issues
|
| 11 |
+
Keywords: single-cell,RNA-seq,heart,cell-communication,bioinformatics,spatial-transcriptomics
|
| 12 |
+
Classifier: Development Status :: 4 - Beta
|
| 13 |
+
Classifier: Intended Audience :: Science/Research
|
| 14 |
+
Classifier: License :: OSI Approved :: Apache Software License
|
| 15 |
+
Classifier: Operating System :: OS Independent
|
| 16 |
+
Classifier: Programming Language :: Python :: 3
|
| 17 |
+
Classifier: Programming Language :: Python :: 3.8
|
| 18 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 19 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 20 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 21 |
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
| 22 |
+
Requires-Python: >=3.8
|
| 23 |
+
Description-Content-Type: text/markdown
|
| 24 |
+
License-File: LICENSE
|
| 25 |
+
License-File: NOTICE
|
| 26 |
+
Requires-Dist: scanpy>=1.9.0
|
| 27 |
+
Requires-Dist: pandas>=1.5.0
|
| 28 |
+
Requires-Dist: numpy>=1.21.0
|
| 29 |
+
Requires-Dist: scipy>=1.9.0
|
| 30 |
+
Requires-Dist: scikit-learn>=1.1.0
|
| 31 |
+
Requires-Dist: matplotlib>=3.5.0
|
| 32 |
+
Requires-Dist: seaborn>=0.11.0
|
| 33 |
+
Requires-Dist: anndata>=0.8.0
|
| 34 |
+
Requires-Dist: plotly>=5.0.0
|
| 35 |
+
Requires-Dist: networkx>=2.8.0
|
| 36 |
+
Requires-Dist: tqdm
|
| 37 |
+
Requires-Dist: statsmodels
|
| 38 |
+
Provides-Extra: communication
|
| 39 |
+
Requires-Dist: liana>=0.1.0; extra == "communication"
|
| 40 |
+
Requires-Dist: cellphonedb>=3.0.0; extra == "communication"
|
| 41 |
+
Requires-Dist: omnipath>=1.0.0; extra == "communication"
|
| 42 |
+
Provides-Extra: api
|
| 43 |
+
Requires-Dist: fastapi>=0.100.0; extra == "api"
|
| 44 |
+
Requires-Dist: uvicorn>=0.23.0; extra == "api"
|
| 45 |
+
Requires-Dist: pydantic>=2.0.0; extra == "api"
|
| 46 |
+
Provides-Extra: dev
|
| 47 |
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
| 48 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
| 49 |
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
| 50 |
+
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
| 51 |
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
| 52 |
+
Requires-Dist: jupyter>=1.0.0; extra == "dev"
|
| 53 |
+
Requires-Dist: notebook>=6.0.0; extra == "dev"
|
| 54 |
+
Provides-Extra: all
|
| 55 |
+
Requires-Dist: liana>=0.1.0; extra == "all"
|
| 56 |
+
Requires-Dist: cellphonedb>=3.0.0; extra == "all"
|
| 57 |
+
Requires-Dist: omnipath>=1.0.0; extra == "all"
|
| 58 |
+
Requires-Dist: fastapi>=0.100.0; extra == "all"
|
| 59 |
+
Requires-Dist: uvicorn>=0.23.0; extra == "all"
|
| 60 |
+
Requires-Dist: pydantic>=2.0.0; extra == "all"
|
| 61 |
+
Requires-Dist: pytest>=7.0.0; extra == "all"
|
| 62 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "all"
|
| 63 |
+
Requires-Dist: black>=23.0.0; extra == "all"
|
| 64 |
+
Requires-Dist: flake8>=6.0.0; extra == "all"
|
| 65 |
+
Requires-Dist: mypy>=1.0.0; extra == "all"
|
| 66 |
+
Requires-Dist: jupyter>=1.0.0; extra == "all"
|
| 67 |
+
Requires-Dist: notebook>=6.0.0; extra == "all"
|
| 68 |
+
Dynamic: author
|
| 69 |
+
Dynamic: author-email
|
| 70 |
+
Dynamic: classifier
|
| 71 |
+
Dynamic: description
|
| 72 |
+
Dynamic: description-content-type
|
| 73 |
+
Dynamic: home-page
|
| 74 |
+
Dynamic: keywords
|
| 75 |
+
Dynamic: license-file
|
| 76 |
+
Dynamic: project-url
|
| 77 |
+
Dynamic: provides-extra
|
| 78 |
+
Dynamic: requires-dist
|
| 79 |
+
Dynamic: requires-python
|
| 80 |
+
Dynamic: summary
|
| 81 |
+
|
| 82 |
+
# HeartMAP: Heart Multi-chamber Analysis Platform
|
| 83 |
+
|
| 84 |
+
[](https://opensource.org/licenses/Apache-2.0)
|
| 85 |
+
[](https://www.python.org/downloads/)
|
| 86 |
+
[](https://doi.org/10.5281/zenodo.16745118)
|
| 87 |
+
|
| 88 |
+
## 🫀 Project Overview
|
| 89 |
+
|
| 90 |
+
HeartMAP is a comprehensive, modular analysis platform for mapping cell-cell communication across all four chambers of the human heart using single-cell RNA sequencing data. The platform integrates multiple analysis pipelines and machine learning models to provide insights into chamber-specific biology, cross-chamber signaling, and therapeutic targets.
|
| 91 |
+
|
| 92 |
+
### � **New Refactored Architecture!**
|
| 93 |
+
|
| 94 |
+
This version has been completely refactored for:
|
| 95 |
+
- ✅ **Deployment Ready**: Easy installation and deployment on any platform
|
| 96 |
+
- ✅ **Hugging Face Compatible**: Ready for Hugging Face Spaces deployment
|
| 97 |
+
- ✅ **Modular Design**: Reusable components and clear separation of concerns
|
| 98 |
+
- ✅ **API Interface**: REST API and CLI for integration with other tools
|
| 99 |
+
- ✅ **Scalable**: Configurable memory usage and performance optimization
|
| 100 |
+
- ✅ **Reproducible**: Comprehensive configuration management and testing
|
| 101 |
+
|
| 102 |
+
## 🚀 Quick Start
|
| 103 |
+
|
| 104 |
+
### Installation
|
| 105 |
+
|
| 106 |
+
```bash
|
| 107 |
+
# Basic installation
|
| 108 |
+
pip install heartmap
|
| 109 |
+
|
| 110 |
+
# Full installation with all features
|
| 111 |
+
pip install heartmap[all]
|
| 112 |
+
|
| 113 |
+
# Development installation
|
| 114 |
+
git clone https://github.com/Tumo505/HeartMap.git
|
| 115 |
+
cd HeartMap
|
| 116 |
+
pip install -e .[all]
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
### Usage
|
| 120 |
+
|
| 121 |
+
#### Command Line Interface
|
| 122 |
+
```bash
|
| 123 |
+
# Run comprehensive analysis
|
| 124 |
+
heartmap data/raw/healthy_human_4chamber_map_unnormalized_V3.h5ad --analysis-type comprehensive --output-dir results
|
| 125 |
+
|
| 126 |
+
# Run basic analysis only
|
| 127 |
+
heartmap data/raw/healthy_human_4chamber_map_unnormalized_V3.h5ad --analysis-type basic --output-dir results/basic
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
#### Python API
|
| 131 |
+
```python
|
| 132 |
+
from heartmap import Config, HeartMapModel
|
| 133 |
+
from heartmap.pipelines import ComprehensivePipeline
|
| 134 |
+
|
| 135 |
+
# Create configuration
|
| 136 |
+
config = Config.default()
|
| 137 |
+
config.data.max_cells_subset = 50000 # Optimize for your memory
|
| 138 |
+
|
| 139 |
+
# Run comprehensive analysis
|
| 140 |
+
pipeline = ComprehensivePipeline(config)
|
| 141 |
+
results = pipeline.run("data/raw/healthy_human_4chamber_map_unnormalized_V3.h5ad", "results/")
|
| 142 |
+
|
| 143 |
+
# Or use individual models
|
| 144 |
+
model = HeartMapModel(config)
|
| 145 |
+
model.fit(adata)
|
| 146 |
+
predictions = model.predict(adata)
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
#### REST API
|
| 150 |
+
```bash
|
| 151 |
+
# Start API server
|
| 152 |
+
heartmap-api --config config.yaml
|
| 153 |
+
|
| 154 |
+
# Use API
|
| 155 |
+
curl -X POST "http://localhost:8000/analyze" \
|
| 156 |
+
-H "Content-Type: multipart/form-data" \
|
| 157 |
+
-F "file=@your_data.h5ad" \
|
| 158 |
+
-F "analysis_type=comprehensive"
|
| 159 |
+
```
|
| 160 |
+
|
| 161 |
+
## 📁 New Project Structure
|
| 162 |
+
|
| 163 |
+
```
|
| 164 |
+
HeartMAP/
|
| 165 |
+
├── src/
|
| 166 |
+
│ └── heartmap/ # Main package
|
| 167 |
+
│ ├── config/ # Configuration management
|
| 168 |
+
│ ├── data/ # Data processing utilities
|
| 169 |
+
│ ├── models/ # Analysis models
|
| 170 |
+
│ ├── pipelines/ # Analysis pipelines
|
| 171 |
+
│ ├── utils/ # Utility functions
|
| 172 |
+
│ └── api/ # API interfaces
|
| 173 |
+
├── scripts/ # Example scripts and tools
|
| 174 |
+
│ ├── run_examples.py # Usage examples
|
| 175 |
+
│ ├── migrate.py # Migration from legacy
|
| 176 |
+
│ └── legacy/ # Original analysis scripts
|
| 177 |
+
├── notebooks/ # Jupyter notebooks
|
| 178 |
+
├── tests/ # Test suite
|
| 179 |
+
├── config.yaml # Default configuration
|
| 180 |
+
├── setup.py # Package setup
|
| 181 |
+
└── requirements-dev.txt # Development dependencies
|
| 182 |
+
```
|
| 183 |
+
|
| 184 |
+
## 🔬 Analysis Components
|
| 185 |
+
|
| 186 |
+
### 1. **Basic Pipeline**
|
| 187 |
+
- Data preprocessing and quality control
|
| 188 |
+
- Cell type annotation and clustering
|
| 189 |
+
- Basic visualization and export
|
| 190 |
+
|
| 191 |
+
### 2. **Advanced Communication Analysis**
|
| 192 |
+
- Cell-cell communication inference
|
| 193 |
+
- Communication hub identification
|
| 194 |
+
- Pathway enrichment analysis
|
| 195 |
+
- Temporal communication patterns
|
| 196 |
+
|
| 197 |
+
### 3. **Multi-Chamber Atlas**
|
| 198 |
+
- Chamber-specific marker identification
|
| 199 |
+
- Cross-chamber correlation analysis
|
| 200 |
+
- Chamber-specific therapeutic targets
|
| 201 |
+
|
| 202 |
+
### 4. **Comprehensive Pipeline**
|
| 203 |
+
- Integration of all analysis components
|
| 204 |
+
- Comprehensive reporting and visualization
|
| 205 |
+
- Model persistence and reusability
|
| 206 |
+
|
| 207 |
+
## 📊 Key Features
|
| 208 |
+
|
| 209 |
+
### 🔧 **Technical Features**
|
| 210 |
+
- **Modular Architecture**: Independent, reusable components
|
| 211 |
+
- **Configuration Management**: YAML/JSON configuration with validation
|
| 212 |
+
- **Memory Optimization**: Automatic dataset scaling for available resources
|
| 213 |
+
- **Model Persistence**: Save and load trained models
|
| 214 |
+
- **Comprehensive Testing**: Full test suite with CI/CD
|
| 215 |
+
- **API Integration**: REST API and CLI interfaces
|
| 216 |
+
- **Docker Support**: Containerized deployment
|
| 217 |
+
|
| 218 |
+
### 🧬 **Scientific Features**
|
| 219 |
+
- **Chamber-Specific Analysis**: LA, RA, LV, RV specific patterns
|
| 220 |
+
- **Communication Networks**: Cell-cell interaction mapping
|
| 221 |
+
- **Hub Cell Identification**: Key regulatory cell types
|
| 222 |
+
- **Pathway Analysis**: Cardiac development, angiogenesis, ECM remodeling
|
| 223 |
+
- **Marker Discovery**: Chamber and cell-type specific biomarkers
|
| 224 |
+
- **Cross-Chamber Correlations**: Inter-chamber relationship analysis
|
| 225 |
+
|
| 226 |
+
## � Key Results
|
| 227 |
+
|
| 228 |
+
### Chamber Distribution
|
| 229 |
+
- **RA (Right Atrium):** 28.4% of cells
|
| 230 |
+
- **LV (Left Ventricle):** 27.0% of cells
|
| 231 |
+
- **LA (Left Atrium):** 26.4% of cells
|
| 232 |
+
- **RV (Right Ventricle):** 18.2% of cells
|
| 233 |
+
|
| 234 |
+
### Chamber-Specific Markers
|
| 235 |
+
- **RA:** NPPA, MIR100HG, MYL7, MYL4, PDE4D
|
| 236 |
+
- **RV:** NEAT1, MYH7, FHL2, C15orf41, PCDH7
|
| 237 |
+
- **LA:** NPPA, ELN, MYL7, EBF2, RORA
|
| 238 |
+
- **LV:** CD36, LINC00486, FHL2, RP11-532N4.2, MYH7
|
| 239 |
+
|
| 240 |
+
### Cross-Chamber Correlations
|
| 241 |
+
- **RV vs LV:** r = 0.985 (highest correlation)
|
| 242 |
+
- **RA vs LA:** r = 0.960
|
| 243 |
+
- **LA vs LV:** r = 0.870 (lowest correlation)
|
| 244 |
+
|
| 245 |
+
## � Deployment Options
|
| 246 |
+
|
| 247 |
+
### Hugging Face Spaces
|
| 248 |
+
```bash
|
| 249 |
+
# Deploy to Hugging Face Spaces
|
| 250 |
+
git clone https://github.com/Tumo505/HeartMap.git
|
| 251 |
+
cd HeartMap
|
| 252 |
+
# Upload app.py and requirements to your Hugging Face Space
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
### Docker Deployment
|
| 256 |
+
```bash
|
| 257 |
+
# Build and run with Docker
|
| 258 |
+
docker build -t heartmap .
|
| 259 |
+
docker run -p 8000:8000 -v $(pwd)/data:/app/data heartmap
|
| 260 |
+
|
| 261 |
+
# Or use docker-compose
|
| 262 |
+
docker-compose up
|
| 263 |
+
```
|
| 264 |
+
|
| 265 |
+
### Local Development
|
| 266 |
+
```bash
|
| 267 |
+
# Clone and setup
|
| 268 |
+
git clone https://github.com/Tumo505/HeartMap.git
|
| 269 |
+
cd HeartMap
|
| 270 |
+
pip install -e .[dev]
|
| 271 |
+
|
| 272 |
+
# Run tests
|
| 273 |
+
python tests/test_heartmap.py
|
| 274 |
+
|
| 275 |
+
# Start development server
|
| 276 |
+
python scripts/run_api_server.py
|
| 277 |
+
```
|
| 278 |
+
|
| 279 |
+
## 📚 Documentation
|
| 280 |
+
|
| 281 |
+
- **API Documentation**: Available at `/docs` when running the API server
|
| 282 |
+
- **Configuration Guide**: See `config.yaml` for all options
|
| 283 |
+
- **Example Notebooks**: Check `notebooks/` directory
|
| 284 |
+
- **Migration Guide**: Run `python scripts/migrate.py` to convert legacy analysis
|
| 285 |
+
|
| 286 |
+
## 🔄 Migration from Legacy Version
|
| 287 |
+
|
| 288 |
+
If you have existing HeartMAP analysis results:
|
| 289 |
+
|
| 290 |
+
```bash
|
| 291 |
+
# Run migration script
|
| 292 |
+
python scripts/migrate.py
|
| 293 |
+
|
| 294 |
+
# This will:
|
| 295 |
+
# - Copy legacy scripts to scripts/legacy/
|
| 296 |
+
# - Create migration notebooks
|
| 297 |
+
# - Generate example scripts with new API
|
| 298 |
+
# - Set up deployment files
|
| 299 |
+
```
|
| 300 |
+
|
| 301 |
+
## 🧪 Configuration
|
| 302 |
+
|
| 303 |
+
Create a `config.yaml` file to customize analysis:
|
| 304 |
+
|
| 305 |
+
```yaml
|
| 306 |
+
data:
|
| 307 |
+
min_genes: 200
|
| 308 |
+
min_cells: 3
|
| 309 |
+
max_cells_subset: 50000 # Optimize for your memory
|
| 310 |
+
max_genes_subset: 5000 # Reduce for faster analysis
|
| 311 |
+
random_seed: 42
|
| 312 |
+
|
| 313 |
+
analysis:
|
| 314 |
+
resolution: 0.5
|
| 315 |
+
n_marker_genes: 25
|
| 316 |
+
use_leiden: true
|
| 317 |
+
|
| 318 |
+
model:
|
| 319 |
+
save_intermediate: true
|
| 320 |
+
use_gpu: false
|
| 321 |
+
```
|
| 322 |
+
|
| 323 |
+
## 🎯 Clinical Applications
|
| 324 |
+
|
| 325 |
+
- **Personalized Medicine**: Chamber-specific treatment strategies
|
| 326 |
+
- **Drug Development**: Chamber-specific therapeutic targets
|
| 327 |
+
- **Disease Understanding**: Chamber-specific disease mechanisms
|
| 328 |
+
- **Biomarker Discovery**: Chamber and communication-specific markers
|
| 329 |
+
|
| 330 |
+
## 🔒 Data Integrity & Reproducibility
|
| 331 |
+
|
| 332 |
+
- **SHA-256 Checksums**: Automatic data integrity verification
|
| 333 |
+
- **Fixed Random Seeds**: Reproducible results across runs
|
| 334 |
+
- **Version Control**: Model and configuration versioning
|
| 335 |
+
- **Comprehensive Logging**: Full analysis provenance tracking
|
| 336 |
+
|
| 337 |
+
## 🤝 Contributing
|
| 338 |
+
|
| 339 |
+
1. Fork the repository
|
| 340 |
+
2. Create a feature branch: `git checkout -b feature-name`
|
| 341 |
+
3. Install development dependencies: `pip install -e .[dev]`
|
| 342 |
+
4. Run tests: `python tests/test_heartmap.py`
|
| 343 |
+
5. Submit a pull request
|
| 344 |
+
|
| 345 |
+
## 📄 License
|
| 346 |
+
|
| 347 |
+
This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
|
| 348 |
+
|
| 349 |
+
## 📖 Citation
|
| 350 |
+
|
| 351 |
+
```bibtex
|
| 352 |
+
@software{heartmap2024,
|
| 353 |
+
title={HeartMAP: A Multi-Chamber Spatial Framework for Cardiac Cell-Cell Communication},
|
| 354 |
+
author={Kgabeng, Tumo and Pandelani, Thanyani and Wang, Lulu and Ngwangwa, Harry},
|
| 355 |
+
year={2024},
|
| 356 |
+
url={https://github.com/Tumo505/HeartMap},
|
| 357 |
+
version={1.0.0}
|
| 358 |
+
}
|
| 359 |
+
```
|
| 360 |
+
|
| 361 |
+
## 🆘 Support
|
| 362 |
+
|
| 363 |
+
- **Issues**: [GitHub Issues](https://github.com/Tumo505/HeartMap/issues)
|
| 364 |
+
- **Discussions**: [GitHub Discussions](https://github.com/Tumo505/HeartMap/discussions)
|
| 365 |
+
- **Email**: 28346416@mylife.unisa.ac.za
|
| 366 |
+
|
| 367 |
+
## 🎉 Acknowledgments
|
| 368 |
+
|
| 369 |
+
- Department of Mechanical, Bioresources and Biomedical Engineering, University of South Africa
|
| 370 |
+
- Department of Engineering, Reykjavik University
|
| 371 |
+
- Single Cell Portal (SCP498) for providing the heart dataset
|
| 372 |
+
- The open-source scientific Python community
|
| 373 |
+
|
| 374 |
+
## 🔒 Data Integrity
|
| 375 |
+
|
| 376 |
+
### SHA-256 Checksums
|
| 377 |
+
To ensure the integrity of raw data files, the HeartMAP project uses **SHA-256 checksums**. This guarantees that the raw data files have not been modified or corrupted during storage or transfer.
|
| 378 |
+
|
| 379 |
+
### Where SHA-256 is Used:
|
| 380 |
+
1. **Raw Data Verification**:
|
| 381 |
+
- Before preprocessing, the pipeline verifies the integrity of raw data files in the `data/raw/` directory using a `checksums.txt` file.
|
| 382 |
+
- Example:
|
| 383 |
+
```bash
|
| 384 |
+
python utils/sha256_checksum.py verify data/raw data/raw/checksums.txt
|
| 385 |
+
```
|
| 386 |
+
|
| 387 |
+
2. **Checksum Generation**:
|
| 388 |
+
- SHA-256 checksums are generated for all raw data files and stored in `checksums.txt`.
|
| 389 |
+
- Example:
|
| 390 |
+
```bash
|
| 391 |
+
python utils/sha256_checksum.py generate data/raw data/raw/checksums.txt
|
| 392 |
+
```
|
| 393 |
+
|
| 394 |
+
### Why SHA-256?
|
| 395 |
+
Using SHA-256 ensures:
|
| 396 |
+
- Data integrity during storage and transfer.
|
| 397 |
+
- Detection of accidental or malicious modifications to raw data files.
|
| 398 |
+
- Reproducibility by verifying that the same raw data is used across different runs.
|
| 399 |
+
|
| 400 |
+
## 🔄 Reproducibility
|
| 401 |
+
|
| 402 |
+
The HeartMAP project ensures reproducibility in all stochastic processes by using fixed random seeds. This guarantees consistent results across different runs of the pipeline. Below are the key areas where random seeds are applied:
|
| 403 |
+
|
| 404 |
+
1. **Random Sampling**:
|
| 405 |
+
- Fixed seed (`seed = 42`) is used for randomly sampling cells during data scaling and preprocessing.
|
| 406 |
+
- Example:
|
| 407 |
+
```python
|
| 408 |
+
np.random.seed(42)
|
| 409 |
+
cell_indices = np.random.choice(adata.n_obs, size=50000, replace=False)
|
| 410 |
+
```
|
| 411 |
+
|
| 412 |
+
2. **Mock Data Generation**:
|
| 413 |
+
- Fixed seed is used to generate mock communication interactions for testing and visualization.
|
| 414 |
+
- Example:
|
| 415 |
+
```python
|
| 416 |
+
np.random.seed(42)
|
| 417 |
+
mock_interactions = pd.DataFrame({
|
| 418 |
+
'source': np.random.choice(cell_types.index, n_interactions),
|
| 419 |
+
'target': np.random.choice(cell_types.index, n_interactions),
|
| 420 |
+
'score': np.random.uniform(0, 1, n_interactions)
|
| 421 |
+
})
|
| 422 |
+
```
|
| 423 |
+
|
| 424 |
+
3. **Clustering**:
|
| 425 |
+
- Fixed seed (`random_state=42`) is used for K-means clustering as a fallback when Leiden clustering is unavailable.
|
| 426 |
+
- Example:
|
| 427 |
+
```python
|
| 428 |
+
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
| 429 |
+
```
|
| 430 |
+
|
| 431 |
+
4. **LIANA Analysis**:
|
| 432 |
+
- Fixed seed is passed to the LIANA analysis function for reproducibility in ligand-receptor interaction analysis.
|
| 433 |
+
- Example:
|
| 434 |
+
```python
|
| 435 |
+
li.mt.rank_aggregate.by_sample(
|
| 436 |
+
adata,
|
| 437 |
+
groupby=cell_type_col,
|
| 438 |
+
resource_name='consensus',
|
| 439 |
+
n_perms=100,
|
| 440 |
+
seed=42,
|
| 441 |
+
verbose=True
|
| 442 |
+
)
|
| 443 |
+
```
|
| 444 |
+
|
| 445 |
+
### Why Fixed Seeds?
|
| 446 |
+
Using fixed random seeds ensures:
|
| 447 |
+
- Consistent results across different runs of the pipeline.
|
| 448 |
+
- Easier debugging and validation of results.
|
| 449 |
+
- Reproducibility for scientific publications and collaborative workflows.
|
| 450 |
+
|
| 451 |
+
## 🚀 Next Steps
|
| 452 |
+
|
| 453 |
+
1. Validate markers and communication patterns with literature and experiments
|
| 454 |
+
2. Integrate spatial transcriptomics and disease data
|
| 455 |
+
3. Develop chamber-specific clinical applications
|
| 456 |
+
|
src/heartmap.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LICENSE
|
| 2 |
+
NOTICE
|
| 3 |
+
README.md
|
| 4 |
+
setup.py
|
| 5 |
+
src/heartmap/__init__.py
|
| 6 |
+
src/heartmap.egg-info/PKG-INFO
|
| 7 |
+
src/heartmap.egg-info/SOURCES.txt
|
| 8 |
+
src/heartmap.egg-info/dependency_links.txt
|
| 9 |
+
src/heartmap.egg-info/entry_points.txt
|
| 10 |
+
src/heartmap.egg-info/requires.txt
|
| 11 |
+
src/heartmap.egg-info/top_level.txt
|
| 12 |
+
src/heartmap/api/__init__.py
|
| 13 |
+
src/heartmap/config/__init__.py
|
| 14 |
+
src/heartmap/data/__init__.py
|
| 15 |
+
src/heartmap/models/__init__.py
|
| 16 |
+
src/heartmap/pipelines/__init__.py
|
| 17 |
+
src/heartmap/utils/__init__.py
|
| 18 |
+
tests/test_heartmap.py
|
src/heartmap.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
src/heartmap.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
heartmap = heartmap.api:run_cli
|
| 3 |
+
heartmap-api = heartmap.api:create_api
|
src/heartmap.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
scanpy>=1.9.0
|
| 2 |
+
pandas>=1.5.0
|
| 3 |
+
numpy>=1.21.0
|
| 4 |
+
scipy>=1.9.0
|
| 5 |
+
scikit-learn>=1.1.0
|
| 6 |
+
matplotlib>=3.5.0
|
| 7 |
+
seaborn>=0.11.0
|
| 8 |
+
anndata>=0.8.0
|
| 9 |
+
plotly>=5.0.0
|
| 10 |
+
networkx>=2.8.0
|
| 11 |
+
tqdm
|
| 12 |
+
statsmodels
|
| 13 |
+
|
| 14 |
+
[all]
|
| 15 |
+
liana>=0.1.0
|
| 16 |
+
cellphonedb>=3.0.0
|
| 17 |
+
omnipath>=1.0.0
|
| 18 |
+
fastapi>=0.100.0
|
| 19 |
+
uvicorn>=0.23.0
|
| 20 |
+
pydantic>=2.0.0
|
| 21 |
+
pytest>=7.0.0
|
| 22 |
+
pytest-cov>=4.0.0
|
| 23 |
+
black>=23.0.0
|
| 24 |
+
flake8>=6.0.0
|
| 25 |
+
mypy>=1.0.0
|
| 26 |
+
jupyter>=1.0.0
|
| 27 |
+
notebook>=6.0.0
|
| 28 |
+
|
| 29 |
+
[api]
|
| 30 |
+
fastapi>=0.100.0
|
| 31 |
+
uvicorn>=0.23.0
|
| 32 |
+
pydantic>=2.0.0
|
| 33 |
+
|
| 34 |
+
[communication]
|
| 35 |
+
liana>=0.1.0
|
| 36 |
+
cellphonedb>=3.0.0
|
| 37 |
+
omnipath>=1.0.0
|
| 38 |
+
|
| 39 |
+
[dev]
|
| 40 |
+
pytest>=7.0.0
|
| 41 |
+
pytest-cov>=4.0.0
|
| 42 |
+
black>=23.0.0
|
| 43 |
+
flake8>=6.0.0
|
| 44 |
+
mypy>=1.0.0
|
| 45 |
+
jupyter>=1.0.0
|
| 46 |
+
notebook>=6.0.0
|
src/heartmap.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
heartmap
|
src/heartmap/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HeartMAP: Heart Multi-chamber Analysis Platform
|
| 3 |
+
|
| 4 |
+
A comprehensive framework for analyzing cell-cell communication across all four chambers
|
| 5 |
+
of the human heart using single-cell RNA sequencing data.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
__version__ = "1.0.0"
|
| 9 |
+
__author__ = "Tumo Kgabeng, Thanyani Pandelani, Lulu Wang, Harry Ngwangwa"
|
| 10 |
+
__email__ = "28346416@mylife.unisa.ac.za"
|
| 11 |
+
|
| 12 |
+
from .models import HeartMapModel
|
| 13 |
+
from .pipelines import BasicPipeline, AdvancedCommunicationPipeline, MultiChamberPipeline
|
| 14 |
+
from .config import Config
|
| 15 |
+
|
| 16 |
+
__all__ = [
|
| 17 |
+
"HeartMapModel",
|
| 18 |
+
"BasicPipeline",
|
| 19 |
+
"AdvancedCommunicationPipeline",
|
| 20 |
+
"MultiChamberPipeline",
|
| 21 |
+
"Config"
|
| 22 |
+
]
|
src/heartmap/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (765 Bytes). View file
|
|
|
src/heartmap/api/__init__.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
API interface for HeartMAP
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from typing import Dict, Any, Optional, Union, List
|
| 6 |
+
import warnings
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
import tempfile
|
| 9 |
+
import json
|
| 10 |
+
|
| 11 |
+
from ..config import Config, load_config
|
| 12 |
+
from ..models import HeartMapModel
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 16 |
+
from fastapi.responses import FileResponse, JSONResponse
|
| 17 |
+
from pydantic import BaseModel
|
| 18 |
+
import uvicorn
|
| 19 |
+
FASTAPI_AVAILABLE = True
|
| 20 |
+
except ImportError:
|
| 21 |
+
FASTAPI_AVAILABLE = False
|
| 22 |
+
warnings.warn("FastAPI not available. Install with: pip install fastapi uvicorn")
|
| 23 |
+
|
| 24 |
+
from ..models import HeartMapModel
|
| 25 |
+
from ..pipelines import ComprehensivePipeline, BasicPipeline, AdvancedCommunicationPipeline, MultiChamberPipeline
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class AnalysisRequest(BaseModel):
|
| 29 |
+
"""Request model for analysis"""
|
| 30 |
+
analysis_type: str = "comprehensive" # basic, advanced, multi_chamber, comprehensive
|
| 31 |
+
config_overrides: Optional[Dict[str, Any]] = None
|
| 32 |
+
output_format: str = "json" # json, csv, h5ad
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class AnalysisResponse(BaseModel):
|
| 36 |
+
"""Response model for analysis"""
|
| 37 |
+
status: str
|
| 38 |
+
message: str
|
| 39 |
+
results: Optional[Dict[str, Any]] = None
|
| 40 |
+
output_files: Optional[List[str]] = None
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class HeartMapAPI:
|
| 44 |
+
"""REST API for HeartMAP analysis"""
|
| 45 |
+
|
| 46 |
+
def __init__(self, config: Union[str, Config, None] = None):
|
| 47 |
+
if isinstance(config, Config):
|
| 48 |
+
self.config = config
|
| 49 |
+
else:
|
| 50 |
+
self.config = load_config(config)
|
| 51 |
+
self.app = FastAPI(
|
| 52 |
+
title="HeartMAP API",
|
| 53 |
+
description="Heart Multi-chamber Analysis Platform API",
|
| 54 |
+
version="1.0.0"
|
| 55 |
+
) if FASTAPI_AVAILABLE else None
|
| 56 |
+
|
| 57 |
+
if FASTAPI_AVAILABLE:
|
| 58 |
+
self._setup_routes()
|
| 59 |
+
|
| 60 |
+
def _setup_routes(self):
|
| 61 |
+
"""Setup API routes"""
|
| 62 |
+
|
| 63 |
+
@self.app.get("/")
|
| 64 |
+
async def root():
|
| 65 |
+
return {"message": "HeartMAP API", "version": "1.0.0"}
|
| 66 |
+
|
| 67 |
+
@self.app.get("/health")
|
| 68 |
+
async def health_check():
|
| 69 |
+
return {"status": "healthy"}
|
| 70 |
+
|
| 71 |
+
@self.app.post("/analyze", response_model=AnalysisResponse)
|
| 72 |
+
async def analyze_data(
|
| 73 |
+
file: UploadFile = File(...),
|
| 74 |
+
request: AnalysisRequest = AnalysisRequest()
|
| 75 |
+
):
|
| 76 |
+
"""Analyze single-cell data"""
|
| 77 |
+
try:
|
| 78 |
+
# Save uploaded file temporarily
|
| 79 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".h5ad") as tmp_file:
|
| 80 |
+
content = await file.read()
|
| 81 |
+
tmp_file.write(content)
|
| 82 |
+
tmp_file_path = tmp_file.name
|
| 83 |
+
|
| 84 |
+
# Update config with overrides
|
| 85 |
+
if request.config_overrides:
|
| 86 |
+
# Apply config overrides (simplified)
|
| 87 |
+
pass
|
| 88 |
+
|
| 89 |
+
# Create output directory
|
| 90 |
+
with tempfile.TemporaryDirectory() as output_dir:
|
| 91 |
+
# Run analysis based on type
|
| 92 |
+
pipeline = self._get_pipeline(request.analysis_type)
|
| 93 |
+
results = pipeline.run(tmp_file_path, output_dir)
|
| 94 |
+
|
| 95 |
+
# Format response
|
| 96 |
+
response_data = self._format_results(results, request.output_format)
|
| 97 |
+
|
| 98 |
+
return AnalysisResponse(
|
| 99 |
+
status="success",
|
| 100 |
+
message="Analysis completed successfully",
|
| 101 |
+
results=response_data
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
except Exception as e:
|
| 105 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 106 |
+
finally:
|
| 107 |
+
# Clean up temporary file
|
| 108 |
+
Path(tmp_file_path).unlink(missing_ok=True)
|
| 109 |
+
|
| 110 |
+
@self.app.get("/models")
|
| 111 |
+
async def list_models():
|
| 112 |
+
"""List available analysis models"""
|
| 113 |
+
return {
|
| 114 |
+
"models": [
|
| 115 |
+
"basic",
|
| 116 |
+
"advanced_communication",
|
| 117 |
+
"multi_chamber",
|
| 118 |
+
"comprehensive"
|
| 119 |
+
]
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
@self.app.get("/config")
|
| 123 |
+
async def get_config():
|
| 124 |
+
"""Get current configuration"""
|
| 125 |
+
return self.config.to_dict()
|
| 126 |
+
|
| 127 |
+
@self.app.post("/config")
|
| 128 |
+
async def update_config(new_config: Dict[str, Any]):
|
| 129 |
+
"""Update configuration"""
|
| 130 |
+
try:
|
| 131 |
+
self.config = Config.from_dict(new_config)
|
| 132 |
+
return {"status": "success", "message": "Configuration updated"}
|
| 133 |
+
except Exception as e:
|
| 134 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 135 |
+
|
| 136 |
+
def _get_pipeline(self, analysis_type: str):
|
| 137 |
+
"""Get analysis pipeline by type"""
|
| 138 |
+
pipelines = {
|
| 139 |
+
"basic": BasicPipeline(self.config),
|
| 140 |
+
"advanced_communication": AdvancedCommunicationPipeline(self.config),
|
| 141 |
+
"multi_chamber": MultiChamberPipeline(self.config),
|
| 142 |
+
"comprehensive": ComprehensivePipeline(self.config)
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
if analysis_type not in pipelines:
|
| 146 |
+
raise ValueError(f"Unknown analysis type: {analysis_type}")
|
| 147 |
+
|
| 148 |
+
return pipelines[analysis_type]
|
| 149 |
+
|
| 150 |
+
def _format_results(self, results: Dict[str, Any], output_format: str) -> Dict[str, Any]:
|
| 151 |
+
"""Format results for API response"""
|
| 152 |
+
if output_format == "json":
|
| 153 |
+
# Convert complex objects to serializable format
|
| 154 |
+
formatted = {}
|
| 155 |
+
|
| 156 |
+
if 'results' in results:
|
| 157 |
+
res = results['results']
|
| 158 |
+
formatted['summary'] = {
|
| 159 |
+
'n_cells': results.get('adata', {}).n_obs if 'adata' in results else 0,
|
| 160 |
+
'analysis_completed': True
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
# Extract key metrics
|
| 164 |
+
if 'annotation' in res:
|
| 165 |
+
ann_res = res['annotation']
|
| 166 |
+
if 'metadata' in ann_res:
|
| 167 |
+
formatted['annotation_summary'] = ann_res['metadata']
|
| 168 |
+
|
| 169 |
+
if 'communication' in res:
|
| 170 |
+
comm_res = res['communication']
|
| 171 |
+
if 'metadata' in comm_res:
|
| 172 |
+
formatted['communication_summary'] = comm_res['metadata']
|
| 173 |
+
|
| 174 |
+
if 'multi_chamber' in res:
|
| 175 |
+
chamber_res = res['multi_chamber']
|
| 176 |
+
if 'metadata' in chamber_res:
|
| 177 |
+
formatted['multi_chamber_summary'] = chamber_res['metadata']
|
| 178 |
+
|
| 179 |
+
return formatted
|
| 180 |
+
|
| 181 |
+
else:
|
| 182 |
+
raise ValueError(f"Unsupported output format: {output_format}")
|
| 183 |
+
|
| 184 |
+
def run(self, host: str = "0.0.0.0", port: int = 8000, debug: bool = False):
|
| 185 |
+
"""Run the API server"""
|
| 186 |
+
if not FASTAPI_AVAILABLE:
|
| 187 |
+
raise ImportError("FastAPI not available. Install with: pip install fastapi uvicorn")
|
| 188 |
+
|
| 189 |
+
uvicorn.run(self.app, host=host, port=port, debug=debug)
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
class CLIInterface:
|
| 193 |
+
"""Command line interface for HeartMAP"""
|
| 194 |
+
|
| 195 |
+
def __init__(self):
|
| 196 |
+
self.config = None
|
| 197 |
+
|
| 198 |
+
def run_analysis(self,
|
| 199 |
+
data_path: str,
|
| 200 |
+
analysis_type: str = "comprehensive",
|
| 201 |
+
output_dir: str = "results",
|
| 202 |
+
config_path: Optional[str] = None):
|
| 203 |
+
"""Run analysis from command line"""
|
| 204 |
+
|
| 205 |
+
# Load configuration
|
| 206 |
+
self.config = load_config(config_path)
|
| 207 |
+
|
| 208 |
+
# Update output directory
|
| 209 |
+
self.config.update_paths(Path(output_dir).parent)
|
| 210 |
+
self.config.create_directories()
|
| 211 |
+
|
| 212 |
+
# Get pipeline
|
| 213 |
+
pipelines = {
|
| 214 |
+
"basic": BasicPipeline(self.config),
|
| 215 |
+
"advanced": AdvancedCommunicationPipeline(self.config),
|
| 216 |
+
"multi_chamber": MultiChamberPipeline(self.config),
|
| 217 |
+
"comprehensive": ComprehensivePipeline(self.config)
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
if analysis_type not in pipelines:
|
| 221 |
+
raise ValueError(f"Unknown analysis type: {analysis_type}")
|
| 222 |
+
|
| 223 |
+
pipeline = pipelines[analysis_type]
|
| 224 |
+
|
| 225 |
+
# Run analysis
|
| 226 |
+
print(f"Starting {analysis_type} analysis...")
|
| 227 |
+
results = pipeline.run(data_path, output_dir)
|
| 228 |
+
|
| 229 |
+
print(f"Analysis completed! Results saved to: {output_dir}")
|
| 230 |
+
return results
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def create_api(config_path: Optional[str] = None) -> HeartMapAPI:
|
| 234 |
+
"""Create HeartMAP API instance"""
|
| 235 |
+
return HeartMapAPI(config_path)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def run_cli():
|
| 239 |
+
"""Run command line interface"""
|
| 240 |
+
import argparse
|
| 241 |
+
|
| 242 |
+
parser = argparse.ArgumentParser(description="HeartMAP Analysis Platform")
|
| 243 |
+
parser.add_argument("data_path", help="Path to input data file")
|
| 244 |
+
parser.add_argument("--analysis-type", default="comprehensive",
|
| 245 |
+
choices=["basic", "advanced", "multi_chamber", "comprehensive"],
|
| 246 |
+
help="Type of analysis to run")
|
| 247 |
+
parser.add_argument("--output-dir", default="results",
|
| 248 |
+
help="Output directory for results")
|
| 249 |
+
parser.add_argument("--config", help="Path to configuration file")
|
| 250 |
+
|
| 251 |
+
args = parser.parse_args()
|
| 252 |
+
|
| 253 |
+
cli = CLIInterface()
|
| 254 |
+
cli.run_analysis(
|
| 255 |
+
data_path=args.data_path,
|
| 256 |
+
analysis_type=args.analysis_type,
|
| 257 |
+
output_dir=args.output_dir,
|
| 258 |
+
config_path=args.config
|
| 259 |
+
)
|
src/heartmap/api/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (7.96 kB). View file
|
|
|
src/heartmap/config/__init__.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration management for HeartMAP
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Dict, Any, Optional, List
|
| 8 |
+
import yaml
|
| 9 |
+
import json
|
| 10 |
+
from dataclasses import dataclass, asdict
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@dataclass
|
| 14 |
+
class DataConfig:
|
| 15 |
+
"""Data processing configuration"""
|
| 16 |
+
min_genes: int = 200
|
| 17 |
+
min_cells: int = 3
|
| 18 |
+
max_cells_subset: Optional[int] = None
|
| 19 |
+
max_genes_subset: Optional[int] = None
|
| 20 |
+
target_sum: float = 1e4
|
| 21 |
+
n_top_genes: int = 2000
|
| 22 |
+
random_seed: int = 42
|
| 23 |
+
test_mode: bool = False
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class AnalysisConfig:
|
| 28 |
+
"""Analysis configuration"""
|
| 29 |
+
n_components_pca: int = 50
|
| 30 |
+
n_neighbors: int = 10
|
| 31 |
+
n_pcs: int = 40
|
| 32 |
+
resolution: float = 0.5
|
| 33 |
+
n_marker_genes: int = 25
|
| 34 |
+
use_leiden: bool = True
|
| 35 |
+
use_liana: bool = True
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@dataclass
|
| 39 |
+
class ModelConfig:
|
| 40 |
+
"""Model configuration"""
|
| 41 |
+
model_type: str = "comprehensive"
|
| 42 |
+
save_intermediate: bool = True
|
| 43 |
+
use_gpu: bool = False
|
| 44 |
+
batch_size: Optional[int] = None
|
| 45 |
+
max_memory_gb: Optional[float] = None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@dataclass
|
| 49 |
+
class PathConfig:
|
| 50 |
+
"""Path configuration"""
|
| 51 |
+
data_dir: str = "data"
|
| 52 |
+
raw_data_dir: str = "data/raw"
|
| 53 |
+
processed_data_dir: str = "data/processed"
|
| 54 |
+
results_dir: str = "results"
|
| 55 |
+
figures_dir: str = "figures"
|
| 56 |
+
models_dir: str = "models"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@dataclass
|
| 60 |
+
class Config:
|
| 61 |
+
"""Main configuration class"""
|
| 62 |
+
data: DataConfig
|
| 63 |
+
analysis: AnalysisConfig
|
| 64 |
+
model: ModelConfig
|
| 65 |
+
paths: PathConfig
|
| 66 |
+
|
| 67 |
+
@classmethod
|
| 68 |
+
def from_dict(cls, config_dict: Dict[str, Any]) -> 'Config':
|
| 69 |
+
"""Create config from dictionary"""
|
| 70 |
+
return cls(
|
| 71 |
+
data=DataConfig(**config_dict.get('data', {})),
|
| 72 |
+
analysis=AnalysisConfig(**config_dict.get('analysis', {})),
|
| 73 |
+
model=ModelConfig(**config_dict.get('model', {})),
|
| 74 |
+
paths=PathConfig(**config_dict.get('paths', {}))
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
@classmethod
|
| 78 |
+
def from_yaml(cls, yaml_path: str) -> 'Config':
|
| 79 |
+
"""Load config from YAML file"""
|
| 80 |
+
with open(yaml_path, 'r') as f:
|
| 81 |
+
config_dict = yaml.safe_load(f)
|
| 82 |
+
return cls.from_dict(config_dict)
|
| 83 |
+
|
| 84 |
+
@classmethod
|
| 85 |
+
def from_json(cls, json_path: str) -> 'Config':
|
| 86 |
+
"""Load config from JSON file"""
|
| 87 |
+
with open(json_path, 'r') as f:
|
| 88 |
+
config_dict = json.load(f)
|
| 89 |
+
return cls.from_dict(config_dict)
|
| 90 |
+
|
| 91 |
+
@classmethod
|
| 92 |
+
def default(cls) -> 'Config':
|
| 93 |
+
"""Create default configuration"""
|
| 94 |
+
return cls(
|
| 95 |
+
data=DataConfig(),
|
| 96 |
+
analysis=AnalysisConfig(),
|
| 97 |
+
model=ModelConfig(),
|
| 98 |
+
paths=PathConfig()
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
@classmethod
|
| 102 |
+
def from_file(cls, file_path: str) -> 'Config':
|
| 103 |
+
"""Load config from file (YAML or JSON)"""
|
| 104 |
+
return load_config(file_path)
|
| 105 |
+
|
| 106 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 107 |
+
"""Convert config to dictionary"""
|
| 108 |
+
return asdict(self)
|
| 109 |
+
|
| 110 |
+
def save_yaml(self, yaml_path: str) -> None:
|
| 111 |
+
"""Save config to YAML file"""
|
| 112 |
+
with open(yaml_path, 'w') as f:
|
| 113 |
+
yaml.dump(self.to_dict(), f, default_flow_style=False)
|
| 114 |
+
|
| 115 |
+
def save_json(self, json_path: str) -> None:
|
| 116 |
+
"""Save config to JSON file"""
|
| 117 |
+
with open(json_path, 'w') as f:
|
| 118 |
+
json.dump(self.to_dict(), f, indent=2)
|
| 119 |
+
|
| 120 |
+
def update_paths(self, base_dir: str) -> None:
|
| 121 |
+
"""Update all paths relative to base directory"""
|
| 122 |
+
base_path = Path(base_dir)
|
| 123 |
+
self.paths.data_dir = str(base_path / "data")
|
| 124 |
+
self.paths.raw_data_dir = str(base_path / "data" / "raw")
|
| 125 |
+
self.paths.processed_data_dir = str(base_path / "data" / "processed")
|
| 126 |
+
self.paths.results_dir = str(base_path / "results")
|
| 127 |
+
self.paths.figures_dir = str(base_path / "figures")
|
| 128 |
+
self.paths.models_dir = str(base_path / "models")
|
| 129 |
+
|
| 130 |
+
def create_directories(self) -> None:
|
| 131 |
+
"""Create all configured directories"""
|
| 132 |
+
dirs_to_create = [
|
| 133 |
+
self.paths.data_dir,
|
| 134 |
+
self.paths.raw_data_dir,
|
| 135 |
+
self.paths.processed_data_dir,
|
| 136 |
+
self.paths.results_dir,
|
| 137 |
+
self.paths.figures_dir,
|
| 138 |
+
self.paths.models_dir
|
| 139 |
+
]
|
| 140 |
+
|
| 141 |
+
for dir_path in dirs_to_create:
|
| 142 |
+
Path(dir_path).mkdir(parents=True, exist_ok=True)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def load_config(config_path: Optional[str] = None) -> Config:
|
| 146 |
+
"""Load configuration from file or return default"""
|
| 147 |
+
if config_path is None:
|
| 148 |
+
return Config.default()
|
| 149 |
+
|
| 150 |
+
config_path = Path(config_path)
|
| 151 |
+
if config_path.suffix.lower() in ['.yml', '.yaml']:
|
| 152 |
+
return Config.from_yaml(str(config_path))
|
| 153 |
+
elif config_path.suffix.lower() == '.json':
|
| 154 |
+
return Config.from_json(str(config_path))
|
| 155 |
+
else:
|
| 156 |
+
raise ValueError(f"Unsupported config file format: {config_path.suffix}")
|
src/heartmap/config/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (5.65 kB). View file
|
|
|
src/heartmap/data/__init__.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data processing utilities for HeartMAP
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import hashlib
|
| 7 |
+
import subprocess
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Optional, Union, Tuple, Dict, Any, List
|
| 10 |
+
import warnings
|
| 11 |
+
|
| 12 |
+
import scanpy as sc
|
| 13 |
+
import pandas as pd
|
| 14 |
+
import numpy as np
|
| 15 |
+
import anndata as ad
|
| 16 |
+
from scipy.sparse import issparse
|
| 17 |
+
|
| 18 |
+
from ..config import Config
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class DataValidator:
|
| 22 |
+
"""Validate data integrity and format"""
|
| 23 |
+
|
| 24 |
+
@staticmethod
|
| 25 |
+
def verify_checksum(file_path: str, expected_checksum: str) -> bool:
|
| 26 |
+
"""Verify file checksum"""
|
| 27 |
+
sha256_hash = hashlib.sha256()
|
| 28 |
+
with open(file_path, "rb") as f:
|
| 29 |
+
for byte_block in iter(lambda: f.read(4096), b""):
|
| 30 |
+
sha256_hash.update(byte_block)
|
| 31 |
+
return sha256_hash.hexdigest() == expected_checksum
|
| 32 |
+
|
| 33 |
+
@staticmethod
|
| 34 |
+
def validate_anndata(adata: ad.AnnData) -> Tuple[bool, List[str]]:
|
| 35 |
+
"""Validate AnnData object structure"""
|
| 36 |
+
issues = []
|
| 37 |
+
|
| 38 |
+
if adata.n_obs == 0:
|
| 39 |
+
issues.append("No cells in dataset")
|
| 40 |
+
if adata.n_vars == 0:
|
| 41 |
+
issues.append("No genes in dataset")
|
| 42 |
+
|
| 43 |
+
# Check for required obs columns
|
| 44 |
+
required_obs = ['n_genes', 'n_counts']
|
| 45 |
+
for col in required_obs:
|
| 46 |
+
if col not in adata.obs.columns:
|
| 47 |
+
issues.append(f"Missing required obs column: {col}")
|
| 48 |
+
|
| 49 |
+
# Check for NaN/inf values
|
| 50 |
+
if issparse(adata.X):
|
| 51 |
+
if not np.isfinite(adata.X.data).all():
|
| 52 |
+
issues.append("Non-finite values in X matrix")
|
| 53 |
+
else:
|
| 54 |
+
if not np.isfinite(adata.X).all():
|
| 55 |
+
issues.append("Non-finite values in X matrix")
|
| 56 |
+
|
| 57 |
+
return len(issues) == 0, issues
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class DataLoader:
|
| 61 |
+
"""Load and preprocess data"""
|
| 62 |
+
|
| 63 |
+
def __init__(self, config: Config):
|
| 64 |
+
self.config = config
|
| 65 |
+
|
| 66 |
+
def load_raw_data(self, file_path: str, verify_integrity: bool = True) -> ad.AnnData:
|
| 67 |
+
"""Load raw single-cell data"""
|
| 68 |
+
file_path = Path(file_path)
|
| 69 |
+
|
| 70 |
+
if not file_path.exists():
|
| 71 |
+
raise FileNotFoundError(f"Data file not found: {file_path}")
|
| 72 |
+
|
| 73 |
+
# Load data based on file format
|
| 74 |
+
if file_path.suffix == '.h5ad':
|
| 75 |
+
adata = sc.read_h5ad(file_path)
|
| 76 |
+
elif file_path.suffix == '.h5':
|
| 77 |
+
adata = sc.read_10x_h5(file_path, genome=None, gex_only=True)
|
| 78 |
+
elif file_path.suffix == '.csv':
|
| 79 |
+
adata = sc.read_csv(file_path).T # Transpose to have genes as variables
|
| 80 |
+
else:
|
| 81 |
+
raise ValueError(f"Unsupported file format: {file_path.suffix}")
|
| 82 |
+
|
| 83 |
+
# Validate data
|
| 84 |
+
is_valid, issues = DataValidator.validate_anndata(adata)
|
| 85 |
+
if not is_valid:
|
| 86 |
+
warnings.warn(f"Data validation issues: {'; '.join(issues)}")
|
| 87 |
+
|
| 88 |
+
return adata
|
| 89 |
+
|
| 90 |
+
def preprocess_basic(self, adata: ad.AnnData) -> ad.AnnData:
|
| 91 |
+
"""Basic preprocessing pipeline"""
|
| 92 |
+
adata = adata.copy()
|
| 93 |
+
|
| 94 |
+
# Make gene names unique
|
| 95 |
+
adata.var_names_make_unique()
|
| 96 |
+
|
| 97 |
+
# Store raw data
|
| 98 |
+
adata.raw = adata
|
| 99 |
+
|
| 100 |
+
# Basic filtering
|
| 101 |
+
sc.pp.filter_cells(adata, min_genes=self.config.data.min_genes)
|
| 102 |
+
sc.pp.filter_genes(adata, min_cells=self.config.data.min_cells)
|
| 103 |
+
|
| 104 |
+
return adata
|
| 105 |
+
|
| 106 |
+
def calculate_qc_metrics(self, adata: ad.AnnData) -> ad.AnnData:
|
| 107 |
+
"""Calculate quality control metrics"""
|
| 108 |
+
adata = adata.copy()
|
| 109 |
+
|
| 110 |
+
# Mitochondrial genes
|
| 111 |
+
adata.var['mt'] = adata.var_names.str.startswith('MT-')
|
| 112 |
+
|
| 113 |
+
# Ribosomal genes
|
| 114 |
+
adata.var['ribo'] = adata.var_names.str.startswith(('RPS', 'RPL'))
|
| 115 |
+
|
| 116 |
+
# Hemoglobin genes
|
| 117 |
+
adata.var['hb'] = adata.var_names.str.contains('^HB[^(P)]')
|
| 118 |
+
|
| 119 |
+
# Calculate QC metrics
|
| 120 |
+
sc.pp.calculate_qc_metrics(
|
| 121 |
+
adata,
|
| 122 |
+
percent_top=None,
|
| 123 |
+
log1p=False,
|
| 124 |
+
inplace=True
|
| 125 |
+
)
|
| 126 |
+
sc.pp.calculate_qc_metrics(
|
| 127 |
+
adata,
|
| 128 |
+
qc_vars=['mt', 'ribo', 'hb'],
|
| 129 |
+
percent_top=None,
|
| 130 |
+
log1p=False,
|
| 131 |
+
inplace=True
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
return adata
|
| 135 |
+
|
| 136 |
+
def scale_for_memory(self, adata: ad.AnnData) -> ad.AnnData:
|
| 137 |
+
"""Scale dataset for memory constraints"""
|
| 138 |
+
if self.config.data.max_cells_subset and adata.n_obs > self.config.data.max_cells_subset:
|
| 139 |
+
np.random.seed(self.config.data.random_seed)
|
| 140 |
+
cell_indices = np.random.choice(
|
| 141 |
+
adata.n_obs,
|
| 142 |
+
size=self.config.data.max_cells_subset,
|
| 143 |
+
replace=False
|
| 144 |
+
)
|
| 145 |
+
adata = adata[cell_indices].copy()
|
| 146 |
+
|
| 147 |
+
if self.config.data.max_genes_subset and adata.n_vars > self.config.data.max_genes_subset:
|
| 148 |
+
# Select most variable genes
|
| 149 |
+
if issparse(adata.X):
|
| 150 |
+
# For sparse matrices, convert to dense temporarily for variance calculation
|
| 151 |
+
dense_subset = adata.X[:min(1000, adata.n_obs), :].toarray()
|
| 152 |
+
gene_vars = np.var(dense_subset, axis=0)
|
| 153 |
+
else:
|
| 154 |
+
gene_vars = np.var(adata.X, axis=0)
|
| 155 |
+
|
| 156 |
+
top_gene_indices = np.argsort(gene_vars)[-self.config.data.max_genes_subset:]
|
| 157 |
+
adata = adata[:, top_gene_indices].copy()
|
| 158 |
+
|
| 159 |
+
return adata
|
| 160 |
+
|
| 161 |
+
def normalize_and_scale(self, adata: ad.AnnData) -> ad.AnnData:
|
| 162 |
+
"""Normalize and scale data"""
|
| 163 |
+
adata = adata.copy()
|
| 164 |
+
|
| 165 |
+
# Clean data - remove infinite values
|
| 166 |
+
if issparse(adata.X):
|
| 167 |
+
adata.X.data = np.nan_to_num(adata.X.data, nan=0, posinf=0, neginf=0)
|
| 168 |
+
else:
|
| 169 |
+
adata.X = np.nan_to_num(adata.X, nan=0, posinf=0, neginf=0)
|
| 170 |
+
|
| 171 |
+
# Normalize to target sum
|
| 172 |
+
sc.pp.normalize_total(adata, target_sum=self.config.data.target_sum)
|
| 173 |
+
|
| 174 |
+
# Log transform
|
| 175 |
+
sc.pp.log1p(adata)
|
| 176 |
+
|
| 177 |
+
return adata
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def preprocess(self, adata: ad.AnnData) -> ad.AnnData:
|
| 181 |
+
"""Complete preprocessing pipeline (convenience method)"""
|
| 182 |
+
adata = self.preprocess_basic(adata)
|
| 183 |
+
adata = self.scale_for_memory(adata)
|
| 184 |
+
adata = self.normalize_and_scale(adata)
|
| 185 |
+
return adata
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
class DataProcessor:
|
| 189 |
+
"""Main data processing class"""
|
| 190 |
+
|
| 191 |
+
def __init__(self, config: Config):
|
| 192 |
+
self.config = config
|
| 193 |
+
self.loader = DataLoader(config)
|
| 194 |
+
|
| 195 |
+
def process_from_raw(self,
|
| 196 |
+
file_path: str,
|
| 197 |
+
save_intermediate: bool = True) -> ad.AnnData:
|
| 198 |
+
"""Complete processing pipeline from raw data"""
|
| 199 |
+
|
| 200 |
+
# Load raw data
|
| 201 |
+
adata = self.loader.load_raw_data(file_path)
|
| 202 |
+
|
| 203 |
+
# Basic preprocessing
|
| 204 |
+
adata = self.loader.preprocess_basic(adata)
|
| 205 |
+
|
| 206 |
+
if save_intermediate:
|
| 207 |
+
adata.write(os.path.join(
|
| 208 |
+
self.config.paths.processed_data_dir,
|
| 209 |
+
"preprocessed.h5ad"
|
| 210 |
+
))
|
| 211 |
+
|
| 212 |
+
# Calculate QC metrics
|
| 213 |
+
adata = self.loader.calculate_qc_metrics(adata)
|
| 214 |
+
|
| 215 |
+
if save_intermediate:
|
| 216 |
+
adata.write(os.path.join(
|
| 217 |
+
self.config.paths.processed_data_dir,
|
| 218 |
+
"qc_calculated.h5ad"
|
| 219 |
+
))
|
| 220 |
+
|
| 221 |
+
# Scale for memory if needed
|
| 222 |
+
if (self.config.data.max_cells_subset or
|
| 223 |
+
self.config.data.max_genes_subset):
|
| 224 |
+
adata = self.loader.scale_for_memory(adata)
|
| 225 |
+
|
| 226 |
+
if save_intermediate:
|
| 227 |
+
adata.write(os.path.join(
|
| 228 |
+
self.config.paths.processed_data_dir,
|
| 229 |
+
"scaled.h5ad"
|
| 230 |
+
))
|
| 231 |
+
|
| 232 |
+
# Normalize and scale
|
| 233 |
+
adata = self.loader.normalize_and_scale(adata)
|
| 234 |
+
|
| 235 |
+
if save_intermediate:
|
| 236 |
+
adata.write(os.path.join(
|
| 237 |
+
self.config.paths.processed_data_dir,
|
| 238 |
+
"normalized.h5ad"
|
| 239 |
+
))
|
| 240 |
+
|
| 241 |
+
return adata
|
| 242 |
+
|
| 243 |
+
def create_test_dataset(self, adata: ad.AnnData, n_cells: int = 1000) -> ad.AnnData:
|
| 244 |
+
"""Create small test dataset"""
|
| 245 |
+
np.random.seed(self.config.data.random_seed)
|
| 246 |
+
|
| 247 |
+
n_cells = min(n_cells, adata.n_obs)
|
| 248 |
+
cell_indices = np.random.choice(adata.n_obs, size=n_cells, replace=False)
|
| 249 |
+
|
| 250 |
+
return adata[cell_indices].copy()
|
src/heartmap/data/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (6.71 kB). View file
|
|
|
src/heartmap/models/__init__.py
ADDED
|
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Core models for HeartMAP analysis
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from abc import ABC, abstractmethod
|
| 6 |
+
from typing import Dict, Any, Optional, List, Tuple
|
| 7 |
+
import warnings
|
| 8 |
+
import pickle
|
| 9 |
+
import json
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
import scanpy as sc
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import numpy as np
|
| 16 |
+
import anndata as ad
|
| 17 |
+
from scipy.sparse import issparse
|
| 18 |
+
from scipy.stats import pearsonr
|
| 19 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 20 |
+
DEPS_AVAILABLE = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
DEPS_AVAILABLE = False
|
| 23 |
+
warnings.warn("Some dependencies not available. Install requirements for full functionality.")
|
| 24 |
+
|
| 25 |
+
from ..config import Config
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class BaseModel(ABC):
|
| 29 |
+
"""Base class for all HeartMAP models"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, config: Config):
|
| 32 |
+
self.config = config
|
| 33 |
+
self.is_fitted = False
|
| 34 |
+
self.metadata = {}
|
| 35 |
+
|
| 36 |
+
@abstractmethod
|
| 37 |
+
def fit(self, adata: 'ad.AnnData') -> 'BaseModel':
|
| 38 |
+
"""Fit the model to data"""
|
| 39 |
+
pass
|
| 40 |
+
|
| 41 |
+
@abstractmethod
|
| 42 |
+
def predict(self, adata: 'ad.AnnData') -> Dict[str, Any]:
|
| 43 |
+
"""Make predictions using the fitted model"""
|
| 44 |
+
pass
|
| 45 |
+
|
| 46 |
+
def save(self, path: str) -> None:
|
| 47 |
+
"""Save model to disk"""
|
| 48 |
+
path = Path(path)
|
| 49 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 50 |
+
|
| 51 |
+
model_data = {
|
| 52 |
+
'config': self.config.to_dict(),
|
| 53 |
+
'is_fitted': self.is_fitted,
|
| 54 |
+
'metadata': self.metadata,
|
| 55 |
+
'model_state': self._get_state()
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
with open(path, 'wb') as f:
|
| 59 |
+
pickle.dump(model_data, f)
|
| 60 |
+
|
| 61 |
+
@classmethod
|
| 62 |
+
def load(cls, path: str) -> 'BaseModel':
|
| 63 |
+
"""Load model from disk"""
|
| 64 |
+
with open(path, 'rb') as f:
|
| 65 |
+
model_data = pickle.load(f)
|
| 66 |
+
|
| 67 |
+
config = Config.from_dict(model_data['config'])
|
| 68 |
+
model = cls(config)
|
| 69 |
+
model.is_fitted = model_data['is_fitted']
|
| 70 |
+
model.metadata = model_data['metadata']
|
| 71 |
+
model._set_state(model_data['model_state'])
|
| 72 |
+
|
| 73 |
+
return model
|
| 74 |
+
|
| 75 |
+
@abstractmethod
|
| 76 |
+
def _get_state(self) -> Dict[str, Any]:
|
| 77 |
+
"""Get model internal state for saving"""
|
| 78 |
+
pass
|
| 79 |
+
|
| 80 |
+
@abstractmethod
|
| 81 |
+
def _set_state(self, state: Dict[str, Any]) -> None:
|
| 82 |
+
"""Set model internal state from loaded data"""
|
| 83 |
+
pass
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
class CellAnnotationModel(BaseModel):
|
| 87 |
+
"""Model for cell type annotation"""
|
| 88 |
+
|
| 89 |
+
def __init__(self, config: Config):
|
| 90 |
+
super().__init__(config)
|
| 91 |
+
self.cluster_labels = None
|
| 92 |
+
self.marker_genes = None
|
| 93 |
+
self.cell_type_map = None
|
| 94 |
+
|
| 95 |
+
def fit(self, adata: 'ad.AnnData') -> 'CellAnnotationModel':
|
| 96 |
+
"""Fit cell annotation model"""
|
| 97 |
+
if not DEPS_AVAILABLE:
|
| 98 |
+
raise ImportError("Required dependencies not available")
|
| 99 |
+
|
| 100 |
+
adata = adata.copy()
|
| 101 |
+
|
| 102 |
+
# Find highly variable genes
|
| 103 |
+
sc.pp.highly_variable_genes(
|
| 104 |
+
adata,
|
| 105 |
+
n_top_genes=self.config.data.n_top_genes
|
| 106 |
+
)
|
| 107 |
+
adata.raw = adata
|
| 108 |
+
adata = adata[:, adata.var.highly_variable]
|
| 109 |
+
|
| 110 |
+
# Scale data
|
| 111 |
+
sc.pp.scale(adata, max_value=10)
|
| 112 |
+
|
| 113 |
+
# PCA
|
| 114 |
+
sc.tl.pca(adata, n_comps=self.config.analysis.n_components_pca)
|
| 115 |
+
|
| 116 |
+
# Neighborhood graph
|
| 117 |
+
sc.pp.neighbors(
|
| 118 |
+
adata,
|
| 119 |
+
n_neighbors=self.config.analysis.n_neighbors,
|
| 120 |
+
n_pcs=self.config.analysis.n_pcs
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Clustering
|
| 124 |
+
if self.config.analysis.use_leiden:
|
| 125 |
+
try:
|
| 126 |
+
sc.tl.leiden(adata, resolution=self.config.analysis.resolution)
|
| 127 |
+
cluster_key = 'leiden'
|
| 128 |
+
except ImportError:
|
| 129 |
+
# Fallback to K-means
|
| 130 |
+
from sklearn.cluster import KMeans
|
| 131 |
+
n_clusters = 10
|
| 132 |
+
kmeans = KMeans(
|
| 133 |
+
n_clusters=n_clusters,
|
| 134 |
+
random_state=self.config.data.random_seed
|
| 135 |
+
)
|
| 136 |
+
cluster_labels = kmeans.fit_predict(adata.obsm['X_pca'][:, :20])
|
| 137 |
+
adata.obs['leiden'] = pd.Categorical(cluster_labels.astype(str))
|
| 138 |
+
cluster_key = 'leiden'
|
| 139 |
+
|
| 140 |
+
# UMAP
|
| 141 |
+
sc.tl.umap(adata)
|
| 142 |
+
|
| 143 |
+
# Find marker genes
|
| 144 |
+
sc.tl.rank_genes_groups(
|
| 145 |
+
adata,
|
| 146 |
+
cluster_key,
|
| 147 |
+
method='wilcoxon',
|
| 148 |
+
n_genes=self.config.analysis.n_marker_genes
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
self.cluster_labels = adata.obs[cluster_key].copy()
|
| 152 |
+
self.marker_genes = pd.DataFrame(adata.uns['rank_genes_groups']['names'])
|
| 153 |
+
self.is_fitted = True
|
| 154 |
+
|
| 155 |
+
# Store important data in obsm/uns for later use
|
| 156 |
+
self.metadata = {
|
| 157 |
+
'n_cells': adata.n_obs,
|
| 158 |
+
'n_genes': adata.n_vars,
|
| 159 |
+
'n_clusters': len(adata.obs[cluster_key].unique()),
|
| 160 |
+
'cluster_key': cluster_key
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
return self
|
| 164 |
+
|
| 165 |
+
def predict(self, adata: 'ad.AnnData') -> Dict[str, Any]:
|
| 166 |
+
"""Predict cell types for new data"""
|
| 167 |
+
if not self.is_fitted:
|
| 168 |
+
raise ValueError("Model must be fitted before prediction")
|
| 169 |
+
|
| 170 |
+
# This is a simplified prediction - in practice you'd want more sophisticated methods
|
| 171 |
+
# For now, we'll use the stored cluster information
|
| 172 |
+
return {
|
| 173 |
+
'cluster_labels': self.cluster_labels,
|
| 174 |
+
'marker_genes': self.marker_genes,
|
| 175 |
+
'metadata': self.metadata
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
def _get_state(self) -> Dict[str, Any]:
|
| 179 |
+
"""Get model state for saving"""
|
| 180 |
+
return {
|
| 181 |
+
'cluster_labels': self.cluster_labels,
|
| 182 |
+
'marker_genes': self.marker_genes.to_dict() if self.marker_genes is not None else None,
|
| 183 |
+
'cell_type_map': self.cell_type_map
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
def _set_state(self, state: Dict[str, Any]) -> None:
|
| 187 |
+
"""Set model state from loaded data"""
|
| 188 |
+
self.cluster_labels = state['cluster_labels']
|
| 189 |
+
self.marker_genes = pd.DataFrame(state['marker_genes']) if state['marker_genes'] else None
|
| 190 |
+
self.cell_type_map = state['cell_type_map']
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
class CommunicationModel(BaseModel):
|
| 194 |
+
"""Model for cell-cell communication analysis"""
|
| 195 |
+
|
| 196 |
+
def __init__(self, config: Config):
|
| 197 |
+
super().__init__(config)
|
| 198 |
+
self.communication_scores = None
|
| 199 |
+
self.hub_scores = None
|
| 200 |
+
self.pathway_scores = None
|
| 201 |
+
|
| 202 |
+
def fit(self, adata: 'ad.AnnData') -> 'CommunicationModel':
|
| 203 |
+
"""Fit communication model"""
|
| 204 |
+
if not DEPS_AVAILABLE:
|
| 205 |
+
raise ImportError("Required dependencies not available")
|
| 206 |
+
|
| 207 |
+
# Ensure we have cell type annotations
|
| 208 |
+
if 'leiden' not in adata.obs.columns:
|
| 209 |
+
raise ValueError("Cell type annotations required. Run CellAnnotationModel first.")
|
| 210 |
+
|
| 211 |
+
adata = adata.copy()
|
| 212 |
+
|
| 213 |
+
# Calculate communication specificity
|
| 214 |
+
self.communication_scores = self._calculate_communication_specificity(adata)
|
| 215 |
+
|
| 216 |
+
# Identify hub cells
|
| 217 |
+
self.hub_scores = self._identify_hub_cells(adata)
|
| 218 |
+
|
| 219 |
+
# Calculate pathway scores
|
| 220 |
+
self.pathway_scores = self._calculate_pathway_scores(adata)
|
| 221 |
+
|
| 222 |
+
self.is_fitted = True
|
| 223 |
+
self.metadata = {
|
| 224 |
+
'n_cells': adata.n_obs,
|
| 225 |
+
'n_cell_types': len(adata.obs['leiden'].unique()),
|
| 226 |
+
'communication_pairs': len(self.communication_scores)
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
return self
|
| 230 |
+
|
| 231 |
+
def _calculate_communication_specificity(self, adata: 'ad.AnnData') -> pd.DataFrame:
|
| 232 |
+
"""Calculate cell-type communication specificity"""
|
| 233 |
+
cell_types = adata.obs['leiden'].unique()
|
| 234 |
+
specificity_data = []
|
| 235 |
+
|
| 236 |
+
for ct1 in cell_types:
|
| 237 |
+
for ct2 in cell_types:
|
| 238 |
+
if ct1 != ct2:
|
| 239 |
+
cells_ct1 = adata.obs['leiden'] == ct1
|
| 240 |
+
cells_ct2 = adata.obs['leiden'] == ct2
|
| 241 |
+
|
| 242 |
+
ct1_expr = adata[cells_ct1].X.mean(axis=0)
|
| 243 |
+
ct2_expr = adata[cells_ct2].X.mean(axis=0)
|
| 244 |
+
|
| 245 |
+
if issparse(ct1_expr):
|
| 246 |
+
ct1_expr = ct1_expr.A1
|
| 247 |
+
ct2_expr = ct2_expr.A1
|
| 248 |
+
|
| 249 |
+
correlation = np.corrcoef(ct1_expr, ct2_expr)[0, 1]
|
| 250 |
+
|
| 251 |
+
specificity_data.append({
|
| 252 |
+
'source': ct1,
|
| 253 |
+
'target': ct2,
|
| 254 |
+
'communication_score': abs(correlation) if not np.isnan(correlation) else 0
|
| 255 |
+
})
|
| 256 |
+
|
| 257 |
+
return pd.DataFrame(specificity_data)
|
| 258 |
+
|
| 259 |
+
def _identify_hub_cells(self, adata: 'ad.AnnData') -> pd.Series:
|
| 260 |
+
"""Identify communication hub cells"""
|
| 261 |
+
hub_scores = []
|
| 262 |
+
|
| 263 |
+
for i in range(adata.n_obs):
|
| 264 |
+
if issparse(adata.X):
|
| 265 |
+
cell_expr = adata.X[i].toarray().flatten()
|
| 266 |
+
else:
|
| 267 |
+
cell_expr = adata.X[i]
|
| 268 |
+
|
| 269 |
+
# Calculate hub score based on expression diversity
|
| 270 |
+
hub_score = (cell_expr.std() * cell_expr.mean()) / (cell_expr.var() + 1)
|
| 271 |
+
hub_scores.append(hub_score)
|
| 272 |
+
|
| 273 |
+
return pd.Series(hub_scores, index=adata.obs.index)
|
| 274 |
+
|
| 275 |
+
def _calculate_pathway_scores(self, adata: 'ad.AnnData') -> pd.DataFrame:
|
| 276 |
+
"""Calculate pathway activity scores"""
|
| 277 |
+
heart_pathways = {
|
| 278 |
+
'Cardiac_Development': ['GATA4', 'NKX2-5', 'MEF2C', 'TBX5', 'HAND1', 'HAND2'],
|
| 279 |
+
'Angiogenesis': ['VEGFA', 'VEGFB', 'VEGFC', 'FLT1', 'KDR', 'PDGFB'],
|
| 280 |
+
'ECM_Remodeling': ['COL1A1', 'COL3A1', 'MMP2', 'MMP9', 'TIMP1', 'TIMP2'],
|
| 281 |
+
'Calcium_Signaling': ['CACNA1C', 'CACNA1D', 'CACNA1G', 'CACNA1H', 'CACNA1S'],
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
pathway_scores = {}
|
| 285 |
+
|
| 286 |
+
for pathway, genes in heart_pathways.items():
|
| 287 |
+
present_genes = [g for g in genes if g in adata.var_names]
|
| 288 |
+
|
| 289 |
+
if present_genes:
|
| 290 |
+
cell_type_scores = {}
|
| 291 |
+
|
| 292 |
+
for ct in adata.obs['leiden'].unique():
|
| 293 |
+
ct_mask = adata.obs['leiden'] == ct
|
| 294 |
+
ct_expr = adata[ct_mask, present_genes].X.mean(axis=0)
|
| 295 |
+
|
| 296 |
+
if issparse(ct_expr):
|
| 297 |
+
pathway_score = ct_expr.A1.mean()
|
| 298 |
+
else:
|
| 299 |
+
pathway_score = ct_expr.mean()
|
| 300 |
+
|
| 301 |
+
cell_type_scores[ct] = pathway_score
|
| 302 |
+
|
| 303 |
+
pathway_scores[pathway] = cell_type_scores
|
| 304 |
+
|
| 305 |
+
return pd.DataFrame(pathway_scores).T
|
| 306 |
+
|
| 307 |
+
def predict(self, adata: 'ad.AnnData') -> Dict[str, Any]:
|
| 308 |
+
"""Predict communication patterns for new data"""
|
| 309 |
+
if not self.is_fitted:
|
| 310 |
+
raise ValueError("Model must be fitted before prediction")
|
| 311 |
+
|
| 312 |
+
return {
|
| 313 |
+
'communication_scores': self.communication_scores,
|
| 314 |
+
'hub_scores': self.hub_scores,
|
| 315 |
+
'pathway_scores': self.pathway_scores,
|
| 316 |
+
'metadata': self.metadata
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
def _get_state(self) -> Dict[str, Any]:
|
| 320 |
+
"""Get model state for saving"""
|
| 321 |
+
return {
|
| 322 |
+
'communication_scores': self.communication_scores.to_dict() if self.communication_scores is not None else None,
|
| 323 |
+
'hub_scores': self.hub_scores.to_dict() if self.hub_scores is not None else None,
|
| 324 |
+
'pathway_scores': self.pathway_scores.to_dict() if self.pathway_scores is not None else None
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
def _set_state(self, state: Dict[str, Any]) -> None:
|
| 328 |
+
"""Set model state from loaded data"""
|
| 329 |
+
self.communication_scores = pd.DataFrame(state['communication_scores']) if state['communication_scores'] else None
|
| 330 |
+
self.hub_scores = pd.Series(state['hub_scores']) if state['hub_scores'] else None
|
| 331 |
+
self.pathway_scores = pd.DataFrame(state['pathway_scores']) if state['pathway_scores'] else None
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
class MultiChamberModel(BaseModel):
|
| 335 |
+
"""Model for multi-chamber heart analysis"""
|
| 336 |
+
|
| 337 |
+
def __init__(self, config: Config):
|
| 338 |
+
super().__init__(config)
|
| 339 |
+
self.chamber_markers = None
|
| 340 |
+
self.chamber_expression = None
|
| 341 |
+
self.cross_chamber_correlations = None
|
| 342 |
+
|
| 343 |
+
def fit(self, adata: 'ad.AnnData') -> 'MultiChamberModel':
|
| 344 |
+
"""Fit multi-chamber model"""
|
| 345 |
+
if not DEPS_AVAILABLE:
|
| 346 |
+
raise ImportError("Required dependencies not available")
|
| 347 |
+
|
| 348 |
+
# Check for chamber information or create mock data
|
| 349 |
+
if 'chamber' not in adata.obs.columns:
|
| 350 |
+
warnings.warn("No chamber information found. Creating mock chamber assignments.")
|
| 351 |
+
chambers = ['LA', 'RA', 'LV', 'RV']
|
| 352 |
+
np.random.seed(self.config.data.random_seed)
|
| 353 |
+
adata.obs['chamber'] = np.random.choice(chambers, adata.n_obs)
|
| 354 |
+
|
| 355 |
+
chambers = adata.obs['chamber'].unique()
|
| 356 |
+
|
| 357 |
+
# Find chamber-specific markers
|
| 358 |
+
self.chamber_markers = {}
|
| 359 |
+
for chamber in chambers:
|
| 360 |
+
adata.obs['is_chamber'] = (adata.obs['chamber'] == chamber).astype(str)
|
| 361 |
+
sc.tl.rank_genes_groups(adata, groupby='is_chamber', method='wilcoxon')
|
| 362 |
+
markers = sc.get.rank_genes_groups_df(adata, group='True')
|
| 363 |
+
self.chamber_markers[chamber] = markers.head(20)
|
| 364 |
+
|
| 365 |
+
# Calculate chamber expression profiles
|
| 366 |
+
self.chamber_expression = {}
|
| 367 |
+
for chamber in chambers:
|
| 368 |
+
chamber_data = adata[adata.obs['chamber'] == chamber]
|
| 369 |
+
chamber_expr = chamber_data.X.mean(axis=0)
|
| 370 |
+
if issparse(chamber_expr):
|
| 371 |
+
chamber_expr = chamber_expr.A1
|
| 372 |
+
self.chamber_expression[chamber] = chamber_expr
|
| 373 |
+
|
| 374 |
+
# Calculate cross-chamber correlations
|
| 375 |
+
self.cross_chamber_correlations = self._calculate_cross_chamber_correlations()
|
| 376 |
+
|
| 377 |
+
self.is_fitted = True
|
| 378 |
+
self.metadata = {
|
| 379 |
+
'n_cells': adata.n_obs,
|
| 380 |
+
'n_chambers': len(chambers),
|
| 381 |
+
'chambers': list(chambers)
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
return self
|
| 385 |
+
|
| 386 |
+
def _calculate_cross_chamber_correlations(self) -> pd.DataFrame:
|
| 387 |
+
"""Calculate correlations between chambers"""
|
| 388 |
+
chambers = list(self.chamber_expression.keys())
|
| 389 |
+
correlations = np.zeros((len(chambers), len(chambers)))
|
| 390 |
+
|
| 391 |
+
for i, chamber1 in enumerate(chambers):
|
| 392 |
+
for j, chamber2 in enumerate(chambers):
|
| 393 |
+
expr1 = self.chamber_expression[chamber1]
|
| 394 |
+
expr2 = self.chamber_expression[chamber2]
|
| 395 |
+
correlations[i, j] = np.corrcoef(expr1, expr2)[0, 1]
|
| 396 |
+
|
| 397 |
+
return pd.DataFrame(correlations, index=chambers, columns=chambers)
|
| 398 |
+
|
| 399 |
+
def predict(self, adata: 'ad.AnnData') -> Dict[str, Any]:
|
| 400 |
+
"""Predict chamber-specific patterns for new data"""
|
| 401 |
+
if not self.is_fitted:
|
| 402 |
+
raise ValueError("Model must be fitted before prediction")
|
| 403 |
+
|
| 404 |
+
return {
|
| 405 |
+
'chamber_markers': self.chamber_markers,
|
| 406 |
+
'chamber_expression': self.chamber_expression,
|
| 407 |
+
'cross_chamber_correlations': self.cross_chamber_correlations,
|
| 408 |
+
'metadata': self.metadata
|
| 409 |
+
}
|
| 410 |
+
|
| 411 |
+
def _get_state(self) -> Dict[str, Any]:
|
| 412 |
+
"""Get model state for saving"""
|
| 413 |
+
return {
|
| 414 |
+
'chamber_markers': {k: v.to_dict() for k, v in self.chamber_markers.items()} if self.chamber_markers else None,
|
| 415 |
+
'chamber_expression': self.chamber_expression,
|
| 416 |
+
'cross_chamber_correlations': self.cross_chamber_correlations.to_dict() if self.cross_chamber_correlations is not None else None
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
def _set_state(self, state: Dict[str, Any]) -> None:
|
| 420 |
+
"""Set model state from loaded data"""
|
| 421 |
+
self.chamber_markers = {k: pd.DataFrame(v) for k, v in state['chamber_markers'].items()} if state['chamber_markers'] else None
|
| 422 |
+
self.chamber_expression = state['chamber_expression']
|
| 423 |
+
self.cross_chamber_correlations = pd.DataFrame(state['cross_chamber_correlations']) if state['cross_chamber_correlations'] else None
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
class HeartMapModel(BaseModel):
|
| 427 |
+
"""Main HeartMAP model combining all analysis components"""
|
| 428 |
+
|
| 429 |
+
def __init__(self, config: Config):
|
| 430 |
+
super().__init__(config)
|
| 431 |
+
self.annotation_model = CellAnnotationModel(config)
|
| 432 |
+
self.communication_model = CommunicationModel(config)
|
| 433 |
+
self.multi_chamber_model = MultiChamberModel(config)
|
| 434 |
+
|
| 435 |
+
def fit(self, adata: 'ad.AnnData') -> 'HeartMapModel':
|
| 436 |
+
"""Fit complete HeartMAP model"""
|
| 437 |
+
# Fit annotation model
|
| 438 |
+
self.annotation_model.fit(adata)
|
| 439 |
+
|
| 440 |
+
# Add cluster information to adata for downstream analysis
|
| 441 |
+
adata.obs['leiden'] = self.annotation_model.cluster_labels
|
| 442 |
+
|
| 443 |
+
# Fit communication model
|
| 444 |
+
self.communication_model.fit(adata)
|
| 445 |
+
|
| 446 |
+
# Fit multi-chamber model
|
| 447 |
+
self.multi_chamber_model.fit(adata)
|
| 448 |
+
|
| 449 |
+
self.is_fitted = True
|
| 450 |
+
self.metadata = {
|
| 451 |
+
'annotation': self.annotation_model.metadata,
|
| 452 |
+
'communication': self.communication_model.metadata,
|
| 453 |
+
'multi_chamber': self.multi_chamber_model.metadata
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
return self
|
| 457 |
+
|
| 458 |
+
def predict(self, adata: 'ad.AnnData') -> Dict[str, Any]:
|
| 459 |
+
"""Complete HeartMAP prediction"""
|
| 460 |
+
if not self.is_fitted:
|
| 461 |
+
raise ValueError("Model must be fitted before prediction")
|
| 462 |
+
|
| 463 |
+
annotation_results = self.annotation_model.predict(adata)
|
| 464 |
+
communication_results = self.communication_model.predict(adata)
|
| 465 |
+
multi_chamber_results = self.multi_chamber_model.predict(adata)
|
| 466 |
+
|
| 467 |
+
return {
|
| 468 |
+
'annotation': annotation_results,
|
| 469 |
+
'communication': communication_results,
|
| 470 |
+
'multi_chamber': multi_chamber_results,
|
| 471 |
+
'metadata': self.metadata
|
| 472 |
+
}
|
| 473 |
+
|
| 474 |
+
def _get_state(self) -> Dict[str, Any]:
|
| 475 |
+
"""Get model state for saving"""
|
| 476 |
+
return {
|
| 477 |
+
'annotation_model': self.annotation_model._get_state(),
|
| 478 |
+
'communication_model': self.communication_model._get_state(),
|
| 479 |
+
'multi_chamber_model': self.multi_chamber_model._get_state()
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
def _set_state(self, state: Dict[str, Any]) -> None:
|
| 483 |
+
"""Set model state from loaded data"""
|
| 484 |
+
self.annotation_model._set_state(state['annotation_model'])
|
| 485 |
+
self.communication_model._set_state(state['communication_model'])
|
| 486 |
+
self.multi_chamber_model._set_state(state['multi_chamber_model'])
|
src/heartmap/models/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (14 kB). View file
|
|
|
src/heartmap/pipelines/__init__.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Analysis pipelines for HeartMAP
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from abc import ABC, abstractmethod
|
| 6 |
+
from typing import Dict, Any, Optional, List, Tuple
|
| 7 |
+
import warnings
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
import scanpy as sc
|
| 12 |
+
import pandas as pd
|
| 13 |
+
import numpy as np
|
| 14 |
+
import anndata as ad
|
| 15 |
+
import matplotlib.pyplot as plt
|
| 16 |
+
import seaborn as sns
|
| 17 |
+
DEPS_AVAILABLE = True
|
| 18 |
+
except ImportError:
|
| 19 |
+
DEPS_AVAILABLE = False
|
| 20 |
+
warnings.warn("Some dependencies not available. Install requirements for full functionality.")
|
| 21 |
+
|
| 22 |
+
from ..config import Config
|
| 23 |
+
from ..data import DataProcessor
|
| 24 |
+
from ..models import CellAnnotationModel, CommunicationModel, MultiChamberModel, HeartMapModel
|
| 25 |
+
from ..utils import Visualizer, ResultsExporter
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class BasePipeline(ABC):
|
| 29 |
+
"""Base class for analysis pipelines"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, config: Config):
|
| 32 |
+
self.config = config
|
| 33 |
+
self.data_processor = DataProcessor(config)
|
| 34 |
+
self.visualizer = Visualizer(config)
|
| 35 |
+
self.exporter = ResultsExporter(config)
|
| 36 |
+
self.results = {}
|
| 37 |
+
|
| 38 |
+
@abstractmethod
|
| 39 |
+
def run(self, data_path: str, output_dir: Optional[str] = None) -> Dict[str, Any]:
|
| 40 |
+
"""Run the complete pipeline"""
|
| 41 |
+
pass
|
| 42 |
+
|
| 43 |
+
def save_results(self, output_dir: str) -> None:
|
| 44 |
+
"""Save pipeline results"""
|
| 45 |
+
output_path = Path(output_dir)
|
| 46 |
+
output_path.mkdir(parents=True, exist_ok=True)
|
| 47 |
+
|
| 48 |
+
self.exporter.export_results(self.results, output_path)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class BasicPipeline(BasePipeline):
|
| 52 |
+
"""Basic single-cell analysis pipeline"""
|
| 53 |
+
|
| 54 |
+
def __init__(self, config: Config):
|
| 55 |
+
super().__init__(config)
|
| 56 |
+
self.model = CellAnnotationModel(config)
|
| 57 |
+
|
| 58 |
+
def run(self, data_path: str, output_dir: Optional[str] = None) -> Dict[str, Any]:
|
| 59 |
+
"""Run basic analysis pipeline"""
|
| 60 |
+
if not DEPS_AVAILABLE:
|
| 61 |
+
raise ImportError("Required dependencies not available")
|
| 62 |
+
|
| 63 |
+
print("=== Running Basic Pipeline ===")
|
| 64 |
+
|
| 65 |
+
# Load and process data
|
| 66 |
+
print("1. Loading and processing data...")
|
| 67 |
+
adata = self.data_processor.process_from_raw(data_path)
|
| 68 |
+
|
| 69 |
+
# Fit annotation model
|
| 70 |
+
print("2. Performing cell annotation...")
|
| 71 |
+
self.model.fit(adata)
|
| 72 |
+
|
| 73 |
+
# Get results
|
| 74 |
+
results = self.model.predict(adata)
|
| 75 |
+
adata.obs['leiden'] = results['cluster_labels']
|
| 76 |
+
|
| 77 |
+
# Generate visualizations
|
| 78 |
+
print("3. Generating visualizations...")
|
| 79 |
+
if output_dir:
|
| 80 |
+
viz_dir = Path(output_dir) / "figures"
|
| 81 |
+
viz_dir.mkdir(parents=True, exist_ok=True)
|
| 82 |
+
|
| 83 |
+
# UMAP plot
|
| 84 |
+
sc.pl.umap(adata, color=['leiden'], legend_loc='on data',
|
| 85 |
+
title='Cell Type Clusters', show=False)
|
| 86 |
+
plt.savefig(viz_dir / "umap_clusters.png", dpi=300, bbox_inches='tight')
|
| 87 |
+
plt.close()
|
| 88 |
+
|
| 89 |
+
# QC metrics
|
| 90 |
+
self.visualizer.plot_qc_metrics(adata, viz_dir)
|
| 91 |
+
|
| 92 |
+
# Store results
|
| 93 |
+
self.results = {
|
| 94 |
+
'adata': adata,
|
| 95 |
+
'model': self.model,
|
| 96 |
+
'results': results
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
# Save results
|
| 100 |
+
if output_dir:
|
| 101 |
+
self.save_results(output_dir)
|
| 102 |
+
# Save processed data
|
| 103 |
+
adata.write(Path(output_dir) / "annotated_data.h5ad")
|
| 104 |
+
# Save model
|
| 105 |
+
self.model.save(Path(output_dir) / "annotation_model.pkl")
|
| 106 |
+
|
| 107 |
+
print("Basic pipeline completed!")
|
| 108 |
+
return self.results
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
class AdvancedCommunicationPipeline(BasePipeline):
|
| 112 |
+
"""Advanced cell-cell communication analysis pipeline"""
|
| 113 |
+
|
| 114 |
+
def __init__(self, config: Config):
|
| 115 |
+
super().__init__(config)
|
| 116 |
+
self.model = CommunicationModel(config)
|
| 117 |
+
|
| 118 |
+
def run(self, data_path: str, output_dir: Optional[str] = None) -> Dict[str, Any]:
|
| 119 |
+
"""Run advanced communication analysis pipeline"""
|
| 120 |
+
if not DEPS_AVAILABLE:
|
| 121 |
+
raise ImportError("Required dependencies not available")
|
| 122 |
+
|
| 123 |
+
print("=== Running Advanced Communication Pipeline ===")
|
| 124 |
+
|
| 125 |
+
# Load processed data (should have cell annotations)
|
| 126 |
+
print("1. Loading annotated data...")
|
| 127 |
+
adata = sc.read_h5ad(data_path)
|
| 128 |
+
|
| 129 |
+
if 'leiden' not in adata.obs.columns:
|
| 130 |
+
raise ValueError("Input data must have cell type annotations. Run BasicPipeline first.")
|
| 131 |
+
|
| 132 |
+
# Fit communication model
|
| 133 |
+
print("2. Analyzing cell-cell communication...")
|
| 134 |
+
self.model.fit(adata)
|
| 135 |
+
|
| 136 |
+
# Get results
|
| 137 |
+
results = self.model.predict(adata)
|
| 138 |
+
|
| 139 |
+
# Generate visualizations
|
| 140 |
+
print("3. Generating communication visualizations...")
|
| 141 |
+
if output_dir:
|
| 142 |
+
viz_dir = Path(output_dir) / "figures"
|
| 143 |
+
viz_dir.mkdir(parents=True, exist_ok=True)
|
| 144 |
+
|
| 145 |
+
self.visualizer.plot_communication_heatmap(
|
| 146 |
+
results['communication_scores'], viz_dir
|
| 147 |
+
)
|
| 148 |
+
self.visualizer.plot_hub_scores(
|
| 149 |
+
adata, results['hub_scores'], viz_dir
|
| 150 |
+
)
|
| 151 |
+
self.visualizer.plot_pathway_scores(
|
| 152 |
+
results['pathway_scores'], viz_dir
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# Store results
|
| 156 |
+
self.results = {
|
| 157 |
+
'adata': adata,
|
| 158 |
+
'model': self.model,
|
| 159 |
+
'results': results
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
# Save results
|
| 163 |
+
if output_dir:
|
| 164 |
+
self.save_results(output_dir)
|
| 165 |
+
self.model.save(Path(output_dir) / "communication_model.pkl")
|
| 166 |
+
|
| 167 |
+
print("Advanced communication pipeline completed!")
|
| 168 |
+
return self.results
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
class MultiChamberPipeline(BasePipeline):
|
| 172 |
+
"""Multi-chamber heart analysis pipeline"""
|
| 173 |
+
|
| 174 |
+
def __init__(self, config: Config):
|
| 175 |
+
super().__init__(config)
|
| 176 |
+
self.model = MultiChamberModel(config)
|
| 177 |
+
|
| 178 |
+
def run(self, data_path: str, output_dir: Optional[str] = None) -> Dict[str, Any]:
|
| 179 |
+
"""Run multi-chamber analysis pipeline"""
|
| 180 |
+
if not DEPS_AVAILABLE:
|
| 181 |
+
raise ImportError("Required dependencies not available")
|
| 182 |
+
|
| 183 |
+
print("=== Running Multi-Chamber Pipeline ===")
|
| 184 |
+
|
| 185 |
+
# Load data
|
| 186 |
+
print("1. Loading data...")
|
| 187 |
+
adata = sc.read_h5ad(data_path)
|
| 188 |
+
|
| 189 |
+
# Fit multi-chamber model
|
| 190 |
+
print("2. Analyzing multi-chamber patterns...")
|
| 191 |
+
self.model.fit(adata)
|
| 192 |
+
|
| 193 |
+
# Get results
|
| 194 |
+
results = self.model.predict(adata)
|
| 195 |
+
|
| 196 |
+
# Generate visualizations
|
| 197 |
+
print("3. Generating multi-chamber visualizations...")
|
| 198 |
+
if output_dir:
|
| 199 |
+
viz_dir = Path(output_dir) / "figures"
|
| 200 |
+
viz_dir.mkdir(parents=True, exist_ok=True)
|
| 201 |
+
|
| 202 |
+
self.visualizer.plot_chamber_composition(
|
| 203 |
+
adata, viz_dir
|
| 204 |
+
)
|
| 205 |
+
self.visualizer.plot_chamber_markers(
|
| 206 |
+
results['chamber_markers'], viz_dir
|
| 207 |
+
)
|
| 208 |
+
self.visualizer.plot_cross_chamber_correlations(
|
| 209 |
+
results['cross_chamber_correlations'], viz_dir
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# Store results
|
| 213 |
+
self.results = {
|
| 214 |
+
'adata': adata,
|
| 215 |
+
'model': self.model,
|
| 216 |
+
'results': results
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
# Save results
|
| 220 |
+
if output_dir:
|
| 221 |
+
self.save_results(output_dir)
|
| 222 |
+
self.model.save(Path(output_dir) / "multi_chamber_model.pkl")
|
| 223 |
+
|
| 224 |
+
print("Multi-chamber pipeline completed!")
|
| 225 |
+
return self.results
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
class ComprehensivePipeline(BasePipeline):
|
| 229 |
+
"""Comprehensive HeartMAP analysis pipeline"""
|
| 230 |
+
|
| 231 |
+
def __init__(self, config: Config):
|
| 232 |
+
super().__init__(config)
|
| 233 |
+
self.model = HeartMapModel(config)
|
| 234 |
+
|
| 235 |
+
def run(self, data_path: str, output_dir: Optional[str] = None) -> Dict[str, Any]:
|
| 236 |
+
"""Run comprehensive HeartMAP analysis"""
|
| 237 |
+
if not DEPS_AVAILABLE:
|
| 238 |
+
raise ImportError("Required dependencies not available")
|
| 239 |
+
|
| 240 |
+
print("=== Running Comprehensive HeartMAP Pipeline ===")
|
| 241 |
+
|
| 242 |
+
# Load and process data
|
| 243 |
+
print("1. Loading and processing data...")
|
| 244 |
+
adata = self.data_processor.process_from_raw(data_path)
|
| 245 |
+
|
| 246 |
+
# Fit complete model
|
| 247 |
+
print("2. Fitting comprehensive HeartMAP model...")
|
| 248 |
+
self.model.fit(adata)
|
| 249 |
+
|
| 250 |
+
# Get results
|
| 251 |
+
results = self.model.predict(adata)
|
| 252 |
+
|
| 253 |
+
# Update adata with all results
|
| 254 |
+
adata.obs['leiden'] = results['annotation']['cluster_labels']
|
| 255 |
+
adata.obs['hub_score'] = results['communication']['hub_scores']
|
| 256 |
+
|
| 257 |
+
# Generate comprehensive visualizations
|
| 258 |
+
print("3. Generating comprehensive visualizations...")
|
| 259 |
+
if output_dir:
|
| 260 |
+
viz_dir = Path(output_dir) / "figures"
|
| 261 |
+
viz_dir.mkdir(parents=True, exist_ok=True)
|
| 262 |
+
|
| 263 |
+
# Create comprehensive dashboard
|
| 264 |
+
self.visualizer.create_comprehensive_dashboard(adata, results, viz_dir)
|
| 265 |
+
|
| 266 |
+
# Store results
|
| 267 |
+
self.results = {
|
| 268 |
+
'adata': adata,
|
| 269 |
+
'model': self.model,
|
| 270 |
+
'results': results
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
# Save results
|
| 274 |
+
if output_dir:
|
| 275 |
+
self.save_results(output_dir)
|
| 276 |
+
adata.write(Path(output_dir) / "heartmap_complete.h5ad")
|
| 277 |
+
self.model.save(Path(output_dir) / "heartmap_model.pkl")
|
| 278 |
+
|
| 279 |
+
# Generate comprehensive report
|
| 280 |
+
self.exporter.generate_comprehensive_report(self.results, output_dir)
|
| 281 |
+
|
| 282 |
+
print("Comprehensive HeartMAP pipeline completed!")
|
| 283 |
+
return self.results
|
src/heartmap/pipelines/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (7.26 kB). View file
|
|
|
src/heartmap/utils/__init__.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility functions and classes for HeartMAP
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import hashlib
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Dict, Any, Optional, List
|
| 9 |
+
import warnings
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
import matplotlib.pyplot as plt
|
| 13 |
+
import seaborn as sns
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import numpy as np
|
| 16 |
+
import scanpy as sc
|
| 17 |
+
import plotly.express as px
|
| 18 |
+
import plotly.graph_objects as go
|
| 19 |
+
from plotly.subplots import make_subplots
|
| 20 |
+
PLOTTING_AVAILABLE = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
PLOTTING_AVAILABLE = False
|
| 23 |
+
warnings.warn("Plotting dependencies not available")
|
| 24 |
+
|
| 25 |
+
from ..config import Config
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class Visualizer:
|
| 29 |
+
"""Visualization utilities for HeartMAP"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, config: Config):
|
| 32 |
+
self.config = config
|
| 33 |
+
|
| 34 |
+
def plot_qc_metrics(self, adata, save_dir: Path) -> None:
|
| 35 |
+
"""Plot quality control metrics"""
|
| 36 |
+
if not PLOTTING_AVAILABLE:
|
| 37 |
+
return
|
| 38 |
+
|
| 39 |
+
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
| 40 |
+
|
| 41 |
+
# Number of genes
|
| 42 |
+
axes[0,0].hist(adata.obs['n_genes'], bins=50, alpha=0.7)
|
| 43 |
+
axes[0,0].set_xlabel('Number of genes')
|
| 44 |
+
axes[0,0].set_ylabel('Number of cells')
|
| 45 |
+
axes[0,0].set_title('Genes per cell')
|
| 46 |
+
|
| 47 |
+
# Total counts
|
| 48 |
+
axes[0,1].hist(adata.obs['total_counts'], bins=50, alpha=0.7)
|
| 49 |
+
axes[0,1].set_xlabel('Total counts')
|
| 50 |
+
axes[0,1].set_ylabel('Number of cells')
|
| 51 |
+
axes[0,1].set_title('UMI per cell')
|
| 52 |
+
|
| 53 |
+
# Mitochondrial percentage
|
| 54 |
+
if 'pct_counts_mt' in adata.obs.columns:
|
| 55 |
+
axes[1,0].hist(adata.obs['pct_counts_mt'], bins=50, alpha=0.7)
|
| 56 |
+
axes[1,0].set_xlabel('Mitochondrial %')
|
| 57 |
+
axes[1,0].set_ylabel('Number of cells')
|
| 58 |
+
axes[1,0].set_title('Mitochondrial gene %')
|
| 59 |
+
|
| 60 |
+
# Scatter plot
|
| 61 |
+
axes[1,1].scatter(adata.obs['total_counts'], adata.obs['n_genes'], alpha=0.6, s=1)
|
| 62 |
+
axes[1,1].set_xlabel('Total counts')
|
| 63 |
+
axes[1,1].set_ylabel('Number of genes')
|
| 64 |
+
axes[1,1].set_title('Genes vs UMI')
|
| 65 |
+
|
| 66 |
+
plt.tight_layout()
|
| 67 |
+
plt.savefig(save_dir / "qc_metrics.png", dpi=300, bbox_inches='tight')
|
| 68 |
+
plt.close()
|
| 69 |
+
|
| 70 |
+
def plot_communication_heatmap(self, comm_scores: pd.DataFrame, save_dir: Path) -> None:
|
| 71 |
+
"""Plot cell-cell communication heatmap"""
|
| 72 |
+
if not PLOTTING_AVAILABLE:
|
| 73 |
+
return
|
| 74 |
+
|
| 75 |
+
# Create pivot table
|
| 76 |
+
pivot_df = comm_scores.pivot(
|
| 77 |
+
index='source',
|
| 78 |
+
columns='target',
|
| 79 |
+
values='communication_score'
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
plt.figure(figsize=(10, 8))
|
| 83 |
+
sns.heatmap(pivot_df, annot=True, cmap='viridis', fmt='.3f')
|
| 84 |
+
plt.title('Cell-Cell Communication Specificity')
|
| 85 |
+
plt.tight_layout()
|
| 86 |
+
plt.savefig(save_dir / "communication_heatmap.png", dpi=300, bbox_inches='tight')
|
| 87 |
+
plt.close()
|
| 88 |
+
|
| 89 |
+
def plot_hub_scores(self, adata, hub_scores: pd.Series, save_dir: Path) -> None:
|
| 90 |
+
"""Plot communication hub scores"""
|
| 91 |
+
if not PLOTTING_AVAILABLE:
|
| 92 |
+
return
|
| 93 |
+
|
| 94 |
+
# Add hub scores to adata for plotting
|
| 95 |
+
adata.obs['hub_score'] = hub_scores
|
| 96 |
+
|
| 97 |
+
sc.pl.umap(adata, color='hub_score', title='Communication Hub Score', show=False)
|
| 98 |
+
plt.savefig(save_dir / "hub_scores.png", dpi=300, bbox_inches='tight')
|
| 99 |
+
plt.close()
|
| 100 |
+
|
| 101 |
+
def plot_pathway_scores(self, pathway_scores: pd.DataFrame, save_dir: Path) -> None:
|
| 102 |
+
"""Plot pathway activity scores"""
|
| 103 |
+
if not PLOTTING_AVAILABLE or pathway_scores.empty:
|
| 104 |
+
return
|
| 105 |
+
|
| 106 |
+
plt.figure(figsize=(12, 8))
|
| 107 |
+
sns.heatmap(pathway_scores, annot=True, cmap='Blues', fmt='.3f')
|
| 108 |
+
plt.title('Pathway Activity by Cell Type')
|
| 109 |
+
plt.tight_layout()
|
| 110 |
+
plt.savefig(save_dir / "pathway_scores.png", dpi=300, bbox_inches='tight')
|
| 111 |
+
plt.close()
|
| 112 |
+
|
| 113 |
+
def plot_chamber_composition(self, adata, save_dir: Path) -> None:
|
| 114 |
+
"""Plot chamber composition"""
|
| 115 |
+
if not PLOTTING_AVAILABLE:
|
| 116 |
+
return
|
| 117 |
+
|
| 118 |
+
if 'chamber' not in adata.obs.columns:
|
| 119 |
+
return
|
| 120 |
+
|
| 121 |
+
chamber_counts = adata.obs['chamber'].value_counts()
|
| 122 |
+
|
| 123 |
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
| 124 |
+
|
| 125 |
+
# Bar plot
|
| 126 |
+
chamber_counts.plot(kind='bar', ax=ax1)
|
| 127 |
+
ax1.set_title('Cell Counts by Chamber')
|
| 128 |
+
ax1.set_xlabel('Chamber')
|
| 129 |
+
ax1.set_ylabel('Number of Cells')
|
| 130 |
+
|
| 131 |
+
# Pie chart
|
| 132 |
+
ax2.pie(chamber_counts.values, labels=chamber_counts.index, autopct='%1.1f%%')
|
| 133 |
+
ax2.set_title('Chamber Proportions')
|
| 134 |
+
|
| 135 |
+
plt.tight_layout()
|
| 136 |
+
plt.savefig(save_dir / "chamber_composition.png", dpi=300, bbox_inches='tight')
|
| 137 |
+
plt.close()
|
| 138 |
+
|
| 139 |
+
def plot_chamber_markers(self, chamber_markers: Dict, save_dir: Path) -> None:
|
| 140 |
+
"""Plot chamber-specific markers"""
|
| 141 |
+
if not PLOTTING_AVAILABLE:
|
| 142 |
+
return
|
| 143 |
+
|
| 144 |
+
n_chambers = len(chamber_markers)
|
| 145 |
+
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
|
| 146 |
+
axes = axes.flatten()
|
| 147 |
+
|
| 148 |
+
for i, (chamber, markers) in enumerate(chamber_markers.items()):
|
| 149 |
+
if i < 4 and not markers.empty:
|
| 150 |
+
top_10 = markers.head(10)
|
| 151 |
+
axes[i].barh(range(len(top_10)), -np.log10(top_10['pvals_adj']))
|
| 152 |
+
axes[i].set_yticks(range(len(top_10)))
|
| 153 |
+
axes[i].set_yticklabels(top_10['names'])
|
| 154 |
+
axes[i].set_xlabel('-log10(adjusted p-value)')
|
| 155 |
+
axes[i].set_title(f'Top Markers - {chamber}')
|
| 156 |
+
|
| 157 |
+
plt.tight_layout()
|
| 158 |
+
plt.savefig(save_dir / "chamber_markers.png", dpi=300, bbox_inches='tight')
|
| 159 |
+
plt.close()
|
| 160 |
+
|
| 161 |
+
def plot_cross_chamber_correlations(self, correlations: pd.DataFrame, save_dir: Path) -> None:
|
| 162 |
+
"""Plot cross-chamber correlations"""
|
| 163 |
+
if not PLOTTING_AVAILABLE:
|
| 164 |
+
return
|
| 165 |
+
|
| 166 |
+
plt.figure(figsize=(8, 6))
|
| 167 |
+
sns.heatmap(correlations, annot=True, cmap='coolwarm', center=0, fmt='.3f')
|
| 168 |
+
plt.title('Cross-Chamber Expression Correlations')
|
| 169 |
+
plt.tight_layout()
|
| 170 |
+
plt.savefig(save_dir / "cross_chamber_correlations.png", dpi=300, bbox_inches='tight')
|
| 171 |
+
plt.close()
|
| 172 |
+
|
| 173 |
+
def create_comprehensive_dashboard(self, adata, results: Dict, save_dir: Path) -> None:
|
| 174 |
+
"""Create comprehensive analysis dashboard"""
|
| 175 |
+
if not PLOTTING_AVAILABLE:
|
| 176 |
+
return
|
| 177 |
+
|
| 178 |
+
# Create a large multi-panel figure
|
| 179 |
+
fig = plt.figure(figsize=(20, 16))
|
| 180 |
+
|
| 181 |
+
# Panel 1: UMAP with clusters
|
| 182 |
+
ax1 = plt.subplot(3, 3, 1)
|
| 183 |
+
sc.pl.umap(adata, color='leiden', ax=ax1, show=False, frameon=False)
|
| 184 |
+
ax1.set_title('Cell Type Clusters')
|
| 185 |
+
|
| 186 |
+
# Panel 2: UMAP with hub scores
|
| 187 |
+
ax2 = plt.subplot(3, 3, 2)
|
| 188 |
+
sc.pl.umap(adata, color='hub_score', ax=ax2, show=False, frameon=False)
|
| 189 |
+
ax2.set_title('Communication Hubs')
|
| 190 |
+
|
| 191 |
+
# Panel 3: Chamber composition (if available)
|
| 192 |
+
if 'chamber' in adata.obs.columns:
|
| 193 |
+
ax3 = plt.subplot(3, 3, 3)
|
| 194 |
+
chamber_counts = adata.obs['chamber'].value_counts()
|
| 195 |
+
ax3.pie(chamber_counts.values, labels=chamber_counts.index, autopct='%1.1f%%')
|
| 196 |
+
ax3.set_title('Chamber Distribution')
|
| 197 |
+
|
| 198 |
+
# Additional panels for other analyses...
|
| 199 |
+
|
| 200 |
+
plt.tight_layout()
|
| 201 |
+
plt.savefig(save_dir / "comprehensive_dashboard.png", dpi=300, bbox_inches='tight')
|
| 202 |
+
plt.close()
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
class ResultsExporter:
|
| 206 |
+
"""Export analysis results in various formats"""
|
| 207 |
+
|
| 208 |
+
def __init__(self, config: Config):
|
| 209 |
+
self.config = config
|
| 210 |
+
|
| 211 |
+
def export_results(self, results: Dict, output_dir: Path) -> None:
|
| 212 |
+
"""Export all results to files"""
|
| 213 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 214 |
+
|
| 215 |
+
# Export tables
|
| 216 |
+
tables_dir = output_dir / "tables"
|
| 217 |
+
tables_dir.mkdir(exist_ok=True)
|
| 218 |
+
|
| 219 |
+
# Export specific result types
|
| 220 |
+
if 'results' in results and 'marker_genes' in results['results']:
|
| 221 |
+
marker_genes = results['results']['marker_genes']
|
| 222 |
+
if marker_genes is not None:
|
| 223 |
+
marker_genes.to_csv(tables_dir / "marker_genes.csv")
|
| 224 |
+
|
| 225 |
+
if 'results' in results and 'communication_scores' in results['results']:
|
| 226 |
+
comm_scores = results['results']['communication_scores']
|
| 227 |
+
if comm_scores is not None:
|
| 228 |
+
comm_scores.to_csv(tables_dir / "communication_scores.csv", index=False)
|
| 229 |
+
|
| 230 |
+
def generate_comprehensive_report(self, results: Dict, output_dir: str) -> None:
|
| 231 |
+
"""Generate comprehensive analysis report"""
|
| 232 |
+
output_path = Path(output_dir)
|
| 233 |
+
|
| 234 |
+
# Get basic statistics
|
| 235 |
+
adata = results.get('adata')
|
| 236 |
+
if adata is None:
|
| 237 |
+
return
|
| 238 |
+
|
| 239 |
+
n_cells = adata.n_obs
|
| 240 |
+
n_genes = adata.n_vars
|
| 241 |
+
|
| 242 |
+
# Generate report
|
| 243 |
+
report = f"""# HeartMAP Analysis Report
|
| 244 |
+
|
| 245 |
+
## Dataset Overview
|
| 246 |
+
- **Total Cells**: {n_cells:,}
|
| 247 |
+
- **Total Genes**: {n_genes:,}
|
| 248 |
+
|
| 249 |
+
## Analysis Components Completed
|
| 250 |
+
- ✅ Cell type annotation
|
| 251 |
+
- ✅ Cell-cell communication analysis
|
| 252 |
+
- ✅ Multi-chamber analysis
|
| 253 |
+
|
| 254 |
+
## Key Findings
|
| 255 |
+
|
| 256 |
+
### Cell Type Annotation
|
| 257 |
+
"""
|
| 258 |
+
|
| 259 |
+
if 'leiden' in adata.obs.columns:
|
| 260 |
+
cluster_counts = adata.obs['leiden'].value_counts()
|
| 261 |
+
report += f"- **Number of cell types identified**: {len(cluster_counts)}\n"
|
| 262 |
+
report += f"- **Cell type distribution**:\n"
|
| 263 |
+
for cluster, count in cluster_counts.head(5).items():
|
| 264 |
+
pct = 100 * count / n_cells
|
| 265 |
+
report += f" - Cluster {cluster}: {count:,} cells ({pct:.1f}%)\n"
|
| 266 |
+
|
| 267 |
+
if 'chamber' in adata.obs.columns:
|
| 268 |
+
chamber_counts = adata.obs['chamber'].value_counts()
|
| 269 |
+
report += f"\n### Chamber Distribution\n"
|
| 270 |
+
for chamber, count in chamber_counts.items():
|
| 271 |
+
pct = 100 * count / n_cells
|
| 272 |
+
report += f"- **{chamber}**: {count:,} cells ({pct:.1f}%)\n"
|
| 273 |
+
|
| 274 |
+
report += """
|
| 275 |
+
### Communication Analysis
|
| 276 |
+
- Cell-cell communication patterns identified
|
| 277 |
+
- Communication hub cells detected
|
| 278 |
+
- Pathway activity scores calculated
|
| 279 |
+
|
| 280 |
+
## Files Generated
|
| 281 |
+
- `heartmap_complete.h5ad`: Complete processed dataset
|
| 282 |
+
- `heartmap_model.pkl`: Trained HeartMAP model
|
| 283 |
+
- `figures/`: All visualization outputs
|
| 284 |
+
- `tables/`: Exported data tables
|
| 285 |
+
|
| 286 |
+
## Next Steps
|
| 287 |
+
1. Validate findings with literature
|
| 288 |
+
2. Investigate specific cell type interactions
|
| 289 |
+
3. Apply model to new datasets
|
| 290 |
+
"""
|
| 291 |
+
|
| 292 |
+
# Save report
|
| 293 |
+
with open(output_path / "analysis_report.md", 'w') as f:
|
| 294 |
+
f.write(report)
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
class ChecksumValidator:
|
| 298 |
+
"""Validate data integrity using checksums"""
|
| 299 |
+
|
| 300 |
+
@staticmethod
|
| 301 |
+
def calculate_sha256(file_path: str) -> str:
|
| 302 |
+
"""Calculate SHA-256 checksum of file"""
|
| 303 |
+
sha256_hash = hashlib.sha256()
|
| 304 |
+
with open(file_path, "rb") as f:
|
| 305 |
+
for byte_block in iter(lambda: f.read(4096), b""):
|
| 306 |
+
sha256_hash.update(byte_block)
|
| 307 |
+
return sha256_hash.hexdigest()
|
| 308 |
+
|
| 309 |
+
@staticmethod
|
| 310 |
+
def verify_file(file_path: str, expected_checksum: str) -> bool:
|
| 311 |
+
"""Verify file against expected checksum"""
|
| 312 |
+
actual_checksum = ChecksumValidator.calculate_sha256(file_path)
|
| 313 |
+
return actual_checksum == expected_checksum
|
| 314 |
+
|
| 315 |
+
@staticmethod
|
| 316 |
+
def generate_checksums(directory: str, output_file: str) -> None:
|
| 317 |
+
"""Generate checksums for all files in directory"""
|
| 318 |
+
directory = Path(directory)
|
| 319 |
+
checksums = {}
|
| 320 |
+
|
| 321 |
+
for file_path in directory.rglob('*'):
|
| 322 |
+
if file_path.is_file():
|
| 323 |
+
rel_path = file_path.relative_to(directory)
|
| 324 |
+
checksum = ChecksumValidator.calculate_sha256(str(file_path))
|
| 325 |
+
checksums[str(rel_path)] = checksum
|
| 326 |
+
|
| 327 |
+
# Save checksums
|
| 328 |
+
with open(output_file, 'w') as f:
|
| 329 |
+
for file_path, checksum in checksums.items():
|
| 330 |
+
f.write(f"{checksum} {file_path}\n")
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
def setup_logging(level: str = "INFO") -> None:
|
| 334 |
+
"""Setup logging configuration"""
|
| 335 |
+
import logging
|
| 336 |
+
|
| 337 |
+
logging.basicConfig(
|
| 338 |
+
level=getattr(logging, level.upper()),
|
| 339 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 340 |
+
handlers=[
|
| 341 |
+
logging.StreamHandler(),
|
| 342 |
+
logging.FileHandler('heartmap.log')
|
| 343 |
+
]
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def create_logger(name: str):
|
| 348 |
+
"""Create a logger with the given name"""
|
| 349 |
+
import logging
|
| 350 |
+
return logging.getLogger(name)
|
src/heartmap/utils/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (11.5 kB). View file
|
|
|