grail-heart / evaluate_test.py

Upload folder using huggingface_hub

9eba44b verified 4 months ago

6.97 kB

	"""Evaluate the trained GRAIL-Heart model on the test set."""

	import torch
	import yaml
	from pathlib import Path
	from src.grail_heart.training.trainer import GRAILHeartTrainer
	from src.grail_heart.models.grail_heart import GRAILHeart
	from src.grail_heart.data.datasets import SpatialTranscriptomicsDataset
	from src.grail_heart.data.graph_builder import SpatialGraphBuilder
	from src.grail_heart.data.cellchat_database import get_omnipath_lr_database
	from torch_geometric.loader import DataLoader


	def main():
	# Load config
	with open('outputs/config.yaml', 'r') as f:
	config = yaml.safe_load(f)

	data_config = config['data']

	# Data directory
	data_dir = Path('data/HeartCellAtlasv2/visium-OCT_LV_raw.h5ad').parent
	h5ad_files = sorted(data_dir.glob('*.h5ad'))

	# Load L-R database from OmniPath (CellPhoneDB + CellChat + more)
	cache_path = Path('data/lr_database_cache.csv')
	lr_pairs = get_omnipath_lr_database(cache_path=cache_path)
	print(f"Loaded L-R database with {len(lr_pairs)} pairs from OmniPath")

	# Load datasets (no limit - use all files)
	datasets = []
	for f in h5ad_files:
	try:
	print(f"Loading {f.name}...")
	ds = SpatialTranscriptomicsDataset(
	data_path=f,
	n_top_genes=data_config['n_top_genes'],
	normalize=data_config['normalize'],
	log_transform=data_config['log_transform'],
	min_cells=data_config['min_cells'],
	min_genes=data_config['min_genes'],
	)
	if ds.has_spatial:
	datasets.append(ds)
	print(f" Loaded: {ds.n_cells} cells, {ds.n_genes} genes")
	except Exception as e:
	print(f" Failed to load: {e}")

	# Build graphs with proper L-R edge labeling (must match training)
	print("\nBuilding graphs...")
	graph_builder = SpatialGraphBuilder(
	method=data_config['graph_method'],
	k=data_config['k_neighbors'],
	)

	graphs = []
	for i, ds in enumerate(datasets):
	print(f"Building graph {i+1}/{len(datasets)}...")

	# Build gene name to index mapping
	gene_to_idx = {g: idx for idx, g in enumerate(ds.gene_names)}

	# Build graph
	graph = graph_builder.build_graph(
	expression=ds.expression,
	spatial_coords=ds.spatial_coords,
	cell_types=ds.cell_types,
	)

	# Label edges as L-R based on real database (must match training)
	edge_type = torch.zeros(graph.edge_index.shape[1], dtype=torch.long)
	expression_threshold = 0.0

	for _, row in lr_pairs.iterrows():
	ligand = row['ligand']
	receptor = row['receptor']

	if ligand in gene_to_idx and receptor in gene_to_idx:
	lig_idx = gene_to_idx[ligand]
	rec_idx = gene_to_idx[receptor]

	lig_expr = ds.expression[:, lig_idx]
	rec_expr = ds.expression[:, rec_idx]

	src_nodes = graph.edge_index[0]
	dst_nodes = graph.edge_index[1]

	src_has_ligand = (lig_expr[src_nodes] > expression_threshold)
	dst_has_receptor = (rec_expr[dst_nodes] > expression_threshold)

	lr_mask = src_has_ligand & dst_has_receptor
	edge_type[lr_mask] = 1

	graph.edge_type = edge_type
	n_lr_edges = (edge_type == 1).sum().item()

	graphs.append(graph)
	print(f" Nodes: {graph.num_nodes}, Edges: {graph.num_edges}, L-R edges: {n_lr_edges}")

	# Get dimensions from first graph
	sample_graph = graphs[0]
	n_genes = sample_graph.x.shape[1]

	# Compute max cell types across all datasets (must match training)
	max_cell_types = max(ds.n_cell_types for ds in datasets if ds.n_cell_types is not None)
	n_cell_types = max_cell_types

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	# Load checkpoint first to get architecture info
	checkpoint = torch.load('outputs/checkpoints/best.pt', map_location=device, weights_only=False)

	# Create model with matching architecture
	model_config = config['model']
	model = GRAILHeart(
	n_genes=n_genes,
	n_cell_types=n_cell_types,
	hidden_dim=model_config['hidden_dim'],
	n_gat_layers=model_config['n_gat_layers'],
	n_heads=model_config['n_heads'],
	n_edge_types=model_config.get('n_edge_types', 2),
	encoder_dims=model_config.get('encoder_dims', [512, 256]),
	dropout=model_config['dropout'],
	use_spatial=model_config.get('use_spatial', True),
	use_variational=model_config.get('use_variational', False),
	tasks=model_config.get('tasks', ['lr', 'reconstruction']),
	n_lr_pairs=len(lr_pairs), # Must match training
	)

	model = model.to(device)

	# Load checkpoint weights
	model.load_state_dict(checkpoint['model_state_dict'])
	print(f"\nLoaded best checkpoint from epoch {checkpoint['epoch']}")
	print(f"Best validation loss: {checkpoint['best_val_loss']:.4f}" if checkpoint['best_val_loss'] != float('inf') else "Best validation loss: (not tracked)")

	# Prepare test loader (last graph)
	test_graphs = [graphs[-1]] # Use last graph as test
	test_loader = DataLoader(test_graphs, batch_size=1, shuffle=False)

	# Evaluate on test set
	print('\n=== Test Set Evaluation ===')
	model.eval()

	from src.grail_heart.training.metrics import compute_reconstruction_metrics

	all_preds = []
	all_targets = []

	with torch.no_grad():
	for batch in test_loader:
	batch = batch.to(device)

	# Forward pass
	outputs = model(batch)

	# Reconstruction predictions
	x_recon = outputs['reconstruction']
	x_true = batch.x

	all_preds.append(x_recon.cpu())
	all_targets.append(x_true.cpu())

	# Concatenate all predictions
	all_preds = torch.cat(all_preds, dim=0)
	all_targets = torch.cat(all_targets, dim=0)

	# Compute reconstruction metrics (expects tensors)
	test_metrics = compute_reconstruction_metrics(all_preds, all_targets)

	for k, v in test_metrics.items():
	print(f" test_{k}: {v:.4f}")

	# Save test metrics
	with open('outputs/test_metrics.yaml', 'w') as f:
	yaml.dump({f'test_{k}': float(v) for k, v in test_metrics.items()}, f, default_flow_style=False)
	print('\nTest metrics saved to outputs/test_metrics.yaml')


	if __name__ == '__main__':
	main()