|
|
| """
|
| Generate network data JSON files from GRAIL-Heart analysis outputs.
|
| These JSON files are then loaded by the interactive network explorer.
|
| """
|
|
|
| import json
|
| import pandas as pd
|
| from pathlib import Path
|
| from collections import defaultdict
|
|
|
| def load_lr_scores(analysis_dir: Path) -> dict:
|
| """Load L-R scores from all regions."""
|
| regions = ['AX', 'LA', 'LV', 'RA', 'RV', 'SP']
|
| all_data = {}
|
|
|
| for region in regions:
|
| lr_file = analysis_dir / 'tables' / f'{region}_lr_scores.csv'
|
| if lr_file.exists():
|
| df = pd.read_csv(lr_file)
|
| all_data[region] = df
|
| print(f"Loaded {len(df)} L-R pairs for {region}")
|
| else:
|
| print(f"Warning: {lr_file} not found")
|
|
|
| return all_data
|
|
|
| def build_network_data(df: pd.DataFrame, top_n: int = None, min_score: float = 0.0) -> dict:
|
| """Build network nodes and edges from L-R score dataframe.
|
|
|
| Args:
|
| df: DataFrame with L-R pairs
|
| top_n: If set, only include top N edges by score. None = include all.
|
| min_score: Minimum score threshold
|
| """
|
|
|
|
|
| if 'ligand' in df.columns:
|
| source_col, target_col = 'ligand', 'receptor'
|
| elif 'source' in df.columns:
|
| source_col, target_col = 'source', 'target'
|
| else:
|
| raise ValueError(f"Unknown columns: {df.columns.tolist()}")
|
|
|
|
|
| score_col = None
|
| for col in ['causal_score', 'score', 'weight', 'mean_score']:
|
| if col in df.columns:
|
| score_col = col
|
| break
|
|
|
| if score_col is None:
|
|
|
| numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
|
| if len(numeric_cols) > 0:
|
| score_col = numeric_cols[0]
|
| else:
|
| df['score'] = 1.0
|
| score_col = 'score'
|
|
|
|
|
| df_filtered = df[df[score_col] >= min_score].copy()
|
| df_filtered = df_filtered.sort_values(score_col, ascending=False)
|
|
|
|
|
| if top_n is not None:
|
| df_filtered = df_filtered.head(top_n)
|
|
|
|
|
| node_info = defaultdict(lambda: {'type': set(), 'degree': 0})
|
|
|
| for _, row in df_filtered.iterrows():
|
| source = row[source_col]
|
| target = row[target_col]
|
|
|
| node_info[source]['type'].add('ligand')
|
| node_info[source]['degree'] += 1
|
|
|
| node_info[target]['type'].add('receptor')
|
| node_info[target]['degree'] += 1
|
|
|
|
|
| nodes = []
|
| for gene, info in node_info.items():
|
| if 'ligand' in info['type'] and 'receptor' in info['type']:
|
| node_type = 'dual'
|
| elif 'ligand' in info['type']:
|
| node_type = 'ligand'
|
| else:
|
| node_type = 'receptor'
|
|
|
| nodes.append({
|
| 'id': gene,
|
| 'type': node_type,
|
| 'degree': info['degree']
|
| })
|
|
|
|
|
| edges = []
|
| for _, row in df_filtered.iterrows():
|
| edges.append({
|
| 'source': row[source_col],
|
| 'target': row[target_col],
|
| 'weight': round(float(row[score_col]), 3)
|
| })
|
|
|
| return {
|
| 'nodes': nodes,
|
| 'edges': edges
|
| }
|
|
|
| def build_integrated_network(all_region_data: dict, top_n: int = None) -> dict:
|
| """Build integrated network from all regions.
|
|
|
| Args:
|
| all_region_data: Dict of region -> DataFrame
|
| top_n: If set, only include top N edges. None = include all.
|
| """
|
|
|
|
|
| combined_edges = []
|
|
|
| for region, df in all_region_data.items():
|
|
|
| if 'ligand' in df.columns:
|
| source_col, target_col = 'ligand', 'receptor'
|
| elif 'source' in df.columns:
|
| source_col, target_col = 'source', 'target'
|
| else:
|
| continue
|
|
|
|
|
| score_col = None
|
| for col in ['causal_score', 'score', 'weight', 'mean_score']:
|
| if col in df.columns:
|
| score_col = col
|
| break
|
|
|
| if score_col is None:
|
| continue
|
|
|
| for _, row in df.iterrows():
|
| combined_edges.append({
|
| 'source': row[source_col],
|
| 'target': row[target_col],
|
| 'weight': float(row[score_col]),
|
| 'region': region
|
| })
|
|
|
|
|
| combined_edges.sort(key=lambda x: x['weight'], reverse=True)
|
| if top_n is not None:
|
| top_edges = combined_edges[:top_n]
|
| else:
|
| top_edges = combined_edges
|
|
|
|
|
| node_info = defaultdict(lambda: {'type': set(), 'degree': 0, 'regions': set()})
|
|
|
| for edge in top_edges:
|
| node_info[edge['source']]['type'].add('ligand')
|
| node_info[edge['source']]['degree'] += 1
|
| node_info[edge['source']]['regions'].add(edge['region'])
|
|
|
| node_info[edge['target']]['type'].add('receptor')
|
| node_info[edge['target']]['degree'] += 1
|
| node_info[edge['target']]['regions'].add(edge['region'])
|
|
|
| nodes = []
|
| for gene, info in node_info.items():
|
| if 'ligand' in info['type'] and 'receptor' in info['type']:
|
| node_type = 'dual'
|
| elif 'ligand' in info['type']:
|
| node_type = 'ligand'
|
| else:
|
| node_type = 'receptor'
|
|
|
| nodes.append({
|
| 'id': gene,
|
| 'type': node_type,
|
| 'degree': info['degree'],
|
| 'regions': list(info['regions'])
|
| })
|
|
|
| edges = [{
|
| 'source': e['source'],
|
| 'target': e['target'],
|
| 'weight': round(e['weight'], 3),
|
| 'region': e['region']
|
| } for e in top_edges]
|
|
|
| return {
|
| 'nodes': nodes,
|
| 'edges': edges
|
| }
|
|
|
| def main():
|
|
|
| project_dir = Path(__file__).parent.parent
|
| analysis_dir = project_dir / 'outputs' / 'enhanced_analysis'
|
| output_dir = project_dir / 'outputs' / 'cytoscape' / 'data'
|
|
|
|
|
| output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
| print("Loading L-R scores from analysis outputs...")
|
| all_data = load_lr_scores(analysis_dir)
|
|
|
| if not all_data:
|
| print("No data found! Check paths.")
|
| return
|
|
|
|
|
| all_networks = {}
|
|
|
| for region, df in all_data.items():
|
| print(f"\nProcessing {region}...")
|
|
|
| network = build_network_data(df, top_n=None, min_score=0.0)
|
| all_networks[region] = network
|
|
|
|
|
| region_file = output_dir / f'{region}_network.json'
|
| with open(region_file, 'w') as f:
|
| json.dump(network, f, indent=2)
|
| print(f" Saved {region_file.name}: {len(network['nodes'])} nodes, {len(network['edges'])} edges")
|
|
|
|
|
| print("\nBuilding integrated network...")
|
| integrated = build_integrated_network(all_data, top_n=None)
|
| all_networks['integrated'] = integrated
|
|
|
| integrated_file = output_dir / 'integrated_network.json'
|
| with open(integrated_file, 'w') as f:
|
| json.dump(integrated, f, indent=2)
|
| print(f"Saved {integrated_file.name}: {len(integrated['nodes'])} nodes, {len(integrated['edges'])} edges")
|
|
|
|
|
| combined_file = output_dir / 'all_networks.json'
|
| with open(combined_file, 'w') as f:
|
| json.dump(all_networks, f)
|
| print(f"\nSaved combined data to {combined_file.name}")
|
|
|
|
|
| metadata = {
|
| 'integrated': {'name': 'Integrated Network', 'cells': 42654, 'description': 'Combined L-R interactions across all cardiac regions'},
|
| 'AX': {'name': 'Apex', 'cells': 6497, 'description': 'Cardiac apex - tip of the heart'},
|
| 'LA': {'name': 'Left Atrium', 'cells': 5822, 'description': 'Upper left chamber receiving oxygenated blood'},
|
| 'LV': {'name': 'Left Ventricle', 'cells': 9626, 'description': 'Main pumping chamber to systemic circulation'},
|
| 'RA': {'name': 'Right Atrium', 'cells': 7027, 'description': 'Upper right chamber receiving deoxygenated blood'},
|
| 'RV': {'name': 'Right Ventricle', 'cells': 5039, 'description': 'Pumping chamber to pulmonary circulation'},
|
| 'SP': {'name': 'Septum', 'cells': 8643, 'description': 'Muscular wall separating left and right chambers'}
|
| }
|
|
|
| metadata_file = output_dir / 'metadata.json'
|
| with open(metadata_file, 'w') as f:
|
| json.dump(metadata, f, indent=2)
|
|
|
| print(f"\nAll network data generated in {output_dir}")
|
| print("\nTo update the explorer, run:")
|
| print(" python scripts/update_explorer.py")
|
|
|
| if __name__ == '__main__':
|
| main()
|
|
|