Spaces:

Tumo505
/

heartmap-cell-analysis

Sleeping

App Files Files Community

heartmap-cell-analysis / src /heartmap /data /lr_database.py

Tumo505

update LR database

b101933 6 months ago

raw

history blame contribute delete

9.15 kB

	"""
	Ligand-Receptor Database Loader for HeartMAP
	Uses LIANA's curated resources (consensus, CellPhoneDB, Omnipath, etc.)
	"""

	import pandas as pd
	from typing import List, Tuple, Optional

	# Try importing LIANA
	try:
	import liana
	LIANA_AVAILABLE = True
	print(f"✓ LIANA v{liana.__version__} loaded successfully")
	except ImportError:
	LIANA_AVAILABLE = False
	print("⚠ LIANA not available - will use fallback database")

	class LigandReceptorDatabase:
	"""
	Manage ligand-receptor interaction databases
	"""

	def __init__(self, resource: str = 'consensus'):
	"""
	Initialize L-R database

	Parameters:
	-----------
	resource : str
	Database to use:
	- 'consensus': curated from multiple sources (recommended)
	- 'cellphonedb': CellPhoneDB database
	- 'omnipath': OmniPath database
	- 'connectome': Ramilowski 2015
	- 'cellinker': CellLinker database
	"""
	self.resource = resource
	self.lr_pairs = None
	self.load_database()

	def load_database(self):
	"""Load L-R database from LIANA or fallback"""

	if LIANA_AVAILABLE:
	self._load_from_liana()
	else:
	self._load_fallback_database()

	def _load_from_liana(self):
	"""Load curated L-R pairs from LIANA"""
	try:
	from liana.resource import select_resource

	# Get the resource
	lr_df = select_resource(self.resource)

	# Standardize column names
	# LIANA typically has: ligand, receptor, (optional: source, confidence)
	if 'ligand' in lr_df.columns and 'receptor' in lr_df.columns:
	self.lr_pairs = lr_df[['ligand', 'receptor']].drop_duplicates()

	# Add confidence score if available
	if 'score' in lr_df.columns:
	self.lr_pairs['confidence'] = lr_df['score']
	elif 'confidence' in lr_df.columns:
	self.lr_pairs['confidence'] = lr_df['confidence']
	else:
	self.lr_pairs['confidence'] = 1.0 # Default high confidence

	print(f"✓ Loaded {len(self.lr_pairs)} L-R pairs from LIANA {self.resource}")
	else:
	print(f" Unexpected LIANA format, using fallback")
	self._load_fallback_database()

	except Exception as e:
	print(f" Error loading LIANA database: {e}")
	self._load_fallback_database()

	def _load_fallback_database(self):
	"""Fallback: comprehensive cardiac-focused L-R pairs"""

	# Expanded cardiac-relevant pairs (100+ interactions)
	cardiac_lr_data = [
	# Angiogenesis & Vascular
	('VEGFA', 'FLT1', 0.95), ('VEGFA', 'KDR', 0.98), ('VEGFA', 'NRP1', 0.85),
	('VEGFB', 'FLT1', 0.90), ('VEGFC', 'FLT4', 0.95),
	('ANGPT1', 'TEK', 0.92), ('ANGPT2', 'TEK', 0.88),
	('PGF', 'FLT1', 0.85),

	# TGF-beta superfamily
	('TGFB1', 'TGFBR1', 0.98), ('TGFB1', 'TGFBR2', 0.98),
	('TGFB2', 'TGFBR1', 0.95), ('TGFB3', 'TGFBR2', 0.95),
	('BMP2', 'BMPR1A', 0.90), ('BMP2', 'BMPR2', 0.88),
	('BMP4', 'BMPR1A', 0.92), ('BMP7', 'BMPR2', 0.85),
	('INHBA', 'ACVR1B', 0.80),

	# FGF signaling
	('FGF1', 'FGFR1', 0.90), ('FGF2', 'FGFR1', 0.95),
	('FGF2', 'FGFR2', 0.92), ('FGF7', 'FGFR2', 0.88),
	('FGF9', 'FGFR3', 0.85), ('FGF10', 'FGFR2', 0.87),

	# PDGF signaling
	('PDGFA', 'PDGFRA', 0.98), ('PDGFB', 'PDGFRB', 0.98),
	('PDGFC', 'PDGFRA', 0.90), ('PDGFD', 'PDGFRB', 0.88),

	# Inflammatory cytokines
	('IL6', 'IL6R', 0.98), ('IL1B', 'IL1R1', 0.95),
	('TNF', 'TNFRSF1A', 0.98), ('TNF', 'TNFRSF1B', 0.95),
	('IFNG', 'IFNGR1', 0.92), ('IL10', 'IL10RA', 0.90),
	('IL4', 'IL4R', 0.88), ('IL13', 'IL13RA1', 0.85),

	# Chemokines
	('CXCL12', 'CXCR4', 0.98), ('CXCL12', 'CXCR7', 0.85),
	('CCL2', 'CCR2', 0.95), ('CCL5', 'CCR5', 0.92),
	('CXCL8', 'CXCR1', 0.90), ('CXCL8', 'CXCR2', 0.88),
	('CCL3', 'CCR1', 0.85), ('CCL4', 'CCR5', 0.87),

	# Growth factors
	('EGF', 'EGFR', 0.98), ('HBEGF', 'EGFR', 0.92),
	('IGF1', 'IGF1R', 0.98), ('IGF2', 'IGF1R', 0.95),
	('HGF', 'MET', 0.95), ('NGF', 'NTRK1', 0.92),

	# Notch signaling
	('DLL1', 'NOTCH1', 0.90), ('DLL4', 'NOTCH1', 0.92),
	('JAG1', 'NOTCH1', 0.88), ('JAG1', 'NOTCH2', 0.85),
	('JAG2', 'NOTCH3', 0.82),

	# Wnt signaling
	('WNT3A', 'FZD1', 0.85), ('WNT3A', 'FZD2', 0.83),
	('WNT5A', 'FZD5', 0.88), ('WNT7A', 'FZD7', 0.85),

	# Extracellular matrix
	('COL1A1', 'ITGA1', 0.90), ('COL1A1', 'ITGA2', 0.88),
	('FN1', 'ITGA5', 0.95), ('FN1', 'ITGB1', 0.92),
	('LAMB1', 'ITGA6', 0.88), ('THBS1', 'CD47', 0.85),

	# Cardiac specific
	('NRG1', 'ERBB2', 0.95), ('NRG1', 'ERBB4', 0.92),
	('EDN1', 'EDNRA', 0.95), ('EDN1', 'EDNRB', 0.90),
	('NPPA', 'NPR1', 0.92), ('NPPB', 'NPR1', 0.90),

	# Semaphorins
	('SEMA3A', 'NRP1', 0.88), ('SEMA3C', 'NRP2', 0.85),
	('SEMA4D', 'PLXNB1', 0.82),

	# Ephrins
	('EFNA1', 'EPHA2', 0.90), ('EFNB2', 'EPHB4', 0.92),

	# Complement
	('C3', 'C3AR1', 0.88), ('C5', 'C5AR1', 0.90),

	# Adhesion
	('ICAM1', 'ITGAL', 0.92), ('VCAM1', 'ITGA4', 0.90),
	('CD34', 'SELP', 0.85), ('PECAM1', 'PECAM1', 0.88),

	# Apoptosis
	('FASLG', 'FAS', 0.95), ('TNFSF10', 'TNFRSF10A', 0.90),

	# Neuropeptides
	('BDNF', 'NTRK2', 0.88), ('NTF3', 'NTRK3', 0.85),

	# Metabolic
	('LEP', 'LEPR', 0.92), ('ADIPOQ', 'ADIPOR1', 0.88),
	('INS', 'INSR', 0.98), ('GCG', 'GCGR', 0.90),
	]

	self.lr_pairs = pd.DataFrame(
	cardiac_lr_data,
	columns=['ligand', 'receptor', 'confidence']
	)

	print(f"✓ Loaded {len(self.lr_pairs)} L-R pairs from fallback cardiac database")

	def get_pairs(self, confidence_threshold: float = 0.0, present_in_data: Optional[List[str]] = None) -> List[Tuple[str, str]]:
	"""
	Get L-R pairs as list of tuples

	Parameters:
	-----------
	confidence_threshold : float
	Minimum confidence score (0-1)
	present_in_data : list of str, optional
	Gene names present in dataset (filters to only available pairs)

	Returns:
	--------
	list of tuples: [(ligand, receptor), ...]
	"""

	# Filter by confidence
	filtered = self.lr_pairs[self.lr_pairs['confidence'] >= confidence_threshold].copy()

	# Filter by gene availability
	if present_in_data is not None:
	present_set = set(present_in_data)
	filtered = filtered[
	filtered['ligand'].isin(present_set) &
	filtered['receptor'].isin(present_set)
	]

	return list(zip(filtered['ligand'], filtered['receptor']))

	def get_dataframe(self, confidence_threshold: float = 0.0) -> pd.DataFrame:
	"""Get L-R pairs as DataFrame"""
	return self.lr_pairs[self.lr_pairs['confidence'] >= confidence_threshold].copy()

	def save_to_csv(self, filepath: str):
	"""Save database to CSV"""
	self.lr_pairs.to_csv(filepath, index=False)
	print(f"✓ Saved L-R database to {filepath}")


	# Convenience function
	def get_ligand_receptor_pairs(adata, resource: str = 'consensus', confidence_threshold: float = 0.7) -> List[Tuple[str, str]]:
	"""
	Get ligand-receptor pairs filtered to genes present in adata

	Parameters:
	-----------
	adata : AnnData
	Annotated data object with gene names
	resource : str
	Database to use ('consensus', 'cellphonedb', etc.)
	confidence_threshold : float
	Minimum confidence (0-1)

	Returns:
	--------
	list of tuples: [(ligand, receptor), ...]
	"""

	db = LigandReceptorDatabase(resource=resource)
	available_genes = adata.var_names.tolist()
	pairs = db.get_pairs(
	confidence_threshold=confidence_threshold,
	present_in_data=available_genes
	)

	print(f" Found {len(pairs)} L-R pairs present in dataset (from {len(db.lr_pairs)} total)")
	return pairs


	if __name__ == "__main__":
	# Test the database loader
	print("Testing L-R Database Loader...")
	print("=" * 60)

	# Test with LIANA if available
	db = LigandReceptorDatabase(resource='consensus')
	print(f"\nTotal pairs: {len(db.lr_pairs)}")
	print(f"\nSample pairs:")
	print(db.lr_pairs.head(10))

	# Test filtering
	high_conf_pairs = db.get_pairs(confidence_threshold=0.9)
	print(f"\nHigh confidence pairs (>0.9): {len(high_conf_pairs)}")

	# Save example
	db.save_to_csv("lr_database_export.csv")