Spaces:
Sleeping
Sleeping
| """ | |
| Ligand-Receptor Database Loader for HeartMAP | |
| Uses LIANA's curated resources (consensus, CellPhoneDB, Omnipath, etc.) | |
| """ | |
| import pandas as pd | |
| from typing import List, Tuple, Optional | |
| # Try importing LIANA | |
| try: | |
| import liana | |
| LIANA_AVAILABLE = True | |
| print(f"✓ LIANA v{liana.__version__} loaded successfully") | |
| except ImportError: | |
| LIANA_AVAILABLE = False | |
| print("⚠ LIANA not available - will use fallback database") | |
| class LigandReceptorDatabase: | |
| """ | |
| Manage ligand-receptor interaction databases | |
| """ | |
| def __init__(self, resource: str = 'consensus'): | |
| """ | |
| Initialize L-R database | |
| Parameters: | |
| ----------- | |
| resource : str | |
| Database to use: | |
| - 'consensus': curated from multiple sources (recommended) | |
| - 'cellphonedb': CellPhoneDB database | |
| - 'omnipath': OmniPath database | |
| - 'connectome': Ramilowski 2015 | |
| - 'cellinker': CellLinker database | |
| """ | |
| self.resource = resource | |
| self.lr_pairs = None | |
| self.load_database() | |
| def load_database(self): | |
| """Load L-R database from LIANA or fallback""" | |
| if LIANA_AVAILABLE: | |
| self._load_from_liana() | |
| else: | |
| self._load_fallback_database() | |
| def _load_from_liana(self): | |
| """Load curated L-R pairs from LIANA""" | |
| try: | |
| from liana.resource import select_resource | |
| # Get the resource | |
| lr_df = select_resource(self.resource) | |
| # Standardize column names | |
| # LIANA typically has: ligand, receptor, (optional: source, confidence) | |
| if 'ligand' in lr_df.columns and 'receptor' in lr_df.columns: | |
| self.lr_pairs = lr_df[['ligand', 'receptor']].drop_duplicates() | |
| # Add confidence score if available | |
| if 'score' in lr_df.columns: | |
| self.lr_pairs['confidence'] = lr_df['score'] | |
| elif 'confidence' in lr_df.columns: | |
| self.lr_pairs['confidence'] = lr_df['confidence'] | |
| else: | |
| self.lr_pairs['confidence'] = 1.0 # Default high confidence | |
| print(f"✓ Loaded {len(self.lr_pairs)} L-R pairs from LIANA {self.resource}") | |
| else: | |
| print(f" Unexpected LIANA format, using fallback") | |
| self._load_fallback_database() | |
| except Exception as e: | |
| print(f" Error loading LIANA database: {e}") | |
| self._load_fallback_database() | |
| def _load_fallback_database(self): | |
| """Fallback: comprehensive cardiac-focused L-R pairs""" | |
| # Expanded cardiac-relevant pairs (100+ interactions) | |
| cardiac_lr_data = [ | |
| # Angiogenesis & Vascular | |
| ('VEGFA', 'FLT1', 0.95), ('VEGFA', 'KDR', 0.98), ('VEGFA', 'NRP1', 0.85), | |
| ('VEGFB', 'FLT1', 0.90), ('VEGFC', 'FLT4', 0.95), | |
| ('ANGPT1', 'TEK', 0.92), ('ANGPT2', 'TEK', 0.88), | |
| ('PGF', 'FLT1', 0.85), | |
| # TGF-beta superfamily | |
| ('TGFB1', 'TGFBR1', 0.98), ('TGFB1', 'TGFBR2', 0.98), | |
| ('TGFB2', 'TGFBR1', 0.95), ('TGFB3', 'TGFBR2', 0.95), | |
| ('BMP2', 'BMPR1A', 0.90), ('BMP2', 'BMPR2', 0.88), | |
| ('BMP4', 'BMPR1A', 0.92), ('BMP7', 'BMPR2', 0.85), | |
| ('INHBA', 'ACVR1B', 0.80), | |
| # FGF signaling | |
| ('FGF1', 'FGFR1', 0.90), ('FGF2', 'FGFR1', 0.95), | |
| ('FGF2', 'FGFR2', 0.92), ('FGF7', 'FGFR2', 0.88), | |
| ('FGF9', 'FGFR3', 0.85), ('FGF10', 'FGFR2', 0.87), | |
| # PDGF signaling | |
| ('PDGFA', 'PDGFRA', 0.98), ('PDGFB', 'PDGFRB', 0.98), | |
| ('PDGFC', 'PDGFRA', 0.90), ('PDGFD', 'PDGFRB', 0.88), | |
| # Inflammatory cytokines | |
| ('IL6', 'IL6R', 0.98), ('IL1B', 'IL1R1', 0.95), | |
| ('TNF', 'TNFRSF1A', 0.98), ('TNF', 'TNFRSF1B', 0.95), | |
| ('IFNG', 'IFNGR1', 0.92), ('IL10', 'IL10RA', 0.90), | |
| ('IL4', 'IL4R', 0.88), ('IL13', 'IL13RA1', 0.85), | |
| # Chemokines | |
| ('CXCL12', 'CXCR4', 0.98), ('CXCL12', 'CXCR7', 0.85), | |
| ('CCL2', 'CCR2', 0.95), ('CCL5', 'CCR5', 0.92), | |
| ('CXCL8', 'CXCR1', 0.90), ('CXCL8', 'CXCR2', 0.88), | |
| ('CCL3', 'CCR1', 0.85), ('CCL4', 'CCR5', 0.87), | |
| # Growth factors | |
| ('EGF', 'EGFR', 0.98), ('HBEGF', 'EGFR', 0.92), | |
| ('IGF1', 'IGF1R', 0.98), ('IGF2', 'IGF1R', 0.95), | |
| ('HGF', 'MET', 0.95), ('NGF', 'NTRK1', 0.92), | |
| # Notch signaling | |
| ('DLL1', 'NOTCH1', 0.90), ('DLL4', 'NOTCH1', 0.92), | |
| ('JAG1', 'NOTCH1', 0.88), ('JAG1', 'NOTCH2', 0.85), | |
| ('JAG2', 'NOTCH3', 0.82), | |
| # Wnt signaling | |
| ('WNT3A', 'FZD1', 0.85), ('WNT3A', 'FZD2', 0.83), | |
| ('WNT5A', 'FZD5', 0.88), ('WNT7A', 'FZD7', 0.85), | |
| # Extracellular matrix | |
| ('COL1A1', 'ITGA1', 0.90), ('COL1A1', 'ITGA2', 0.88), | |
| ('FN1', 'ITGA5', 0.95), ('FN1', 'ITGB1', 0.92), | |
| ('LAMB1', 'ITGA6', 0.88), ('THBS1', 'CD47', 0.85), | |
| # Cardiac specific | |
| ('NRG1', 'ERBB2', 0.95), ('NRG1', 'ERBB4', 0.92), | |
| ('EDN1', 'EDNRA', 0.95), ('EDN1', 'EDNRB', 0.90), | |
| ('NPPA', 'NPR1', 0.92), ('NPPB', 'NPR1', 0.90), | |
| # Semaphorins | |
| ('SEMA3A', 'NRP1', 0.88), ('SEMA3C', 'NRP2', 0.85), | |
| ('SEMA4D', 'PLXNB1', 0.82), | |
| # Ephrins | |
| ('EFNA1', 'EPHA2', 0.90), ('EFNB2', 'EPHB4', 0.92), | |
| # Complement | |
| ('C3', 'C3AR1', 0.88), ('C5', 'C5AR1', 0.90), | |
| # Adhesion | |
| ('ICAM1', 'ITGAL', 0.92), ('VCAM1', 'ITGA4', 0.90), | |
| ('CD34', 'SELP', 0.85), ('PECAM1', 'PECAM1', 0.88), | |
| # Apoptosis | |
| ('FASLG', 'FAS', 0.95), ('TNFSF10', 'TNFRSF10A', 0.90), | |
| # Neuropeptides | |
| ('BDNF', 'NTRK2', 0.88), ('NTF3', 'NTRK3', 0.85), | |
| # Metabolic | |
| ('LEP', 'LEPR', 0.92), ('ADIPOQ', 'ADIPOR1', 0.88), | |
| ('INS', 'INSR', 0.98), ('GCG', 'GCGR', 0.90), | |
| ] | |
| self.lr_pairs = pd.DataFrame( | |
| cardiac_lr_data, | |
| columns=['ligand', 'receptor', 'confidence'] | |
| ) | |
| print(f"✓ Loaded {len(self.lr_pairs)} L-R pairs from fallback cardiac database") | |
| def get_pairs(self, confidence_threshold: float = 0.0, present_in_data: Optional[List[str]] = None) -> List[Tuple[str, str]]: | |
| """ | |
| Get L-R pairs as list of tuples | |
| Parameters: | |
| ----------- | |
| confidence_threshold : float | |
| Minimum confidence score (0-1) | |
| present_in_data : list of str, optional | |
| Gene names present in dataset (filters to only available pairs) | |
| Returns: | |
| -------- | |
| list of tuples: [(ligand, receptor), ...] | |
| """ | |
| # Filter by confidence | |
| filtered = self.lr_pairs[self.lr_pairs['confidence'] >= confidence_threshold].copy() | |
| # Filter by gene availability | |
| if present_in_data is not None: | |
| present_set = set(present_in_data) | |
| filtered = filtered[ | |
| filtered['ligand'].isin(present_set) & | |
| filtered['receptor'].isin(present_set) | |
| ] | |
| return list(zip(filtered['ligand'], filtered['receptor'])) | |
| def get_dataframe(self, confidence_threshold: float = 0.0) -> pd.DataFrame: | |
| """Get L-R pairs as DataFrame""" | |
| return self.lr_pairs[self.lr_pairs['confidence'] >= confidence_threshold].copy() | |
| def save_to_csv(self, filepath: str): | |
| """Save database to CSV""" | |
| self.lr_pairs.to_csv(filepath, index=False) | |
| print(f"✓ Saved L-R database to {filepath}") | |
| # Convenience function | |
| def get_ligand_receptor_pairs(adata, resource: str = 'consensus', confidence_threshold: float = 0.7) -> List[Tuple[str, str]]: | |
| """ | |
| Get ligand-receptor pairs filtered to genes present in adata | |
| Parameters: | |
| ----------- | |
| adata : AnnData | |
| Annotated data object with gene names | |
| resource : str | |
| Database to use ('consensus', 'cellphonedb', etc.) | |
| confidence_threshold : float | |
| Minimum confidence (0-1) | |
| Returns: | |
| -------- | |
| list of tuples: [(ligand, receptor), ...] | |
| """ | |
| db = LigandReceptorDatabase(resource=resource) | |
| available_genes = adata.var_names.tolist() | |
| pairs = db.get_pairs( | |
| confidence_threshold=confidence_threshold, | |
| present_in_data=available_genes | |
| ) | |
| print(f" Found {len(pairs)} L-R pairs present in dataset (from {len(db.lr_pairs)} total)") | |
| return pairs | |
| if __name__ == "__main__": | |
| # Test the database loader | |
| print("Testing L-R Database Loader...") | |
| print("=" * 60) | |
| # Test with LIANA if available | |
| db = LigandReceptorDatabase(resource='consensus') | |
| print(f"\nTotal pairs: {len(db.lr_pairs)}") | |
| print(f"\nSample pairs:") | |
| print(db.lr_pairs.head(10)) | |
| # Test filtering | |
| high_conf_pairs = db.get_pairs(confidence_threshold=0.9) | |
| print(f"\nHigh confidence pairs (>0.9): {len(high_conf_pairs)}") | |
| # Save example | |
| db.save_to_csv("lr_database_export.csv") | |