# Spatial Transcriptomics Pipeline Templates ## 1. Quality Control Workflow ```nextflow #!/usr/bin/env nextflow nextflow.enable.dsl=2 params.input_pattern = "*.h5ad" params.output_dir = "./results" params.min_genes_per_cell = 200 process SPATIAL_QC { tag "$sample_id" label 'process_medium' container 'quay.io/biocontainers/scanpy:1.9.1--pyhd8ed1ab_0' publishDir "${params.output_dir}/qc", mode: 'copy' input: tuple val(sample_id), path(spatial_data) output: tuple val(sample_id), path("${sample_id}_qc.h5ad"), emit: filtered_data path "${sample_id}_metrics.json", emit: metrics script: """ #!/usr/bin/env python import scanpy as sc import json adata = sc.read_h5ad('${spatial_data}') # QC metrics adata.var['mt'] = adata.var_names.str.startswith('MT-') sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True) # Filter cells and genes sc.pp.filter_cells(adata, min_genes=${params.min_genes_per_cell}) sc.pp.filter_genes(adata, min_cells=3) adata.write('${sample_id}_qc.h5ad') metrics = { 'sample_id': '${sample_id}', 'n_cells': int(adata.n_obs), 'n_genes': int(adata.n_vars) } with open('${sample_id}_metrics.json', 'w') as f: json.dump(metrics, f, indent=2) """ } workflow { input_ch = Channel.fromPath(params.input_pattern) .map { file -> [file.baseName, file] } SPATIAL_QC(input_ch) } ``` ## 2. Spatial Decomposition Pipeline ```nextflow process SPATIAL_DECOMPOSITION { tag "$sample_id" label 'process_high' container 'openproblems/spatial-decomposition:latest' input: tuple val(sample_id), path(spatial_data), path(reference_data) output: tuple val(sample_id), path("${sample_id}_decomposition.h5ad"), emit: results path "${sample_id}_proportions.csv", emit: proportions script: """ #!/usr/bin/env python import anndata as ad import pandas as pd import numpy as np # Load data adata_spatial = ad.read_h5ad('${spatial_data}') adata_reference = ad.read_h5ad('${reference_data}') # Find common genes common_genes = adata_spatial.var_names.intersection(adata_reference.var_names) adata_spatial = adata_spatial[:, common_genes].copy() adata_reference = adata_reference[:, common_genes].copy() # Get cell types cell_types = adata_reference.obs['cell_type'].unique() # Placeholder decomposition (replace with actual method) n_spots = adata_spatial.n_obs n_cell_types = len(cell_types) proportions_matrix = np.random.dirichlet(np.ones(n_cell_types), size=n_spots) # Create proportions DataFrame proportions_df = pd.DataFrame( proportions_matrix, columns=cell_types, index=adata_spatial.obs_names ) proportions_df.to_csv('${sample_id}_proportions.csv') # Add proportions to spatial data for cell_type in cell_types: adata_spatial.obs[f'prop_{cell_type}'] = proportions_df[cell_type].values adata_spatial.write('${sample_id}_decomposition.h5ad') """ } ``` ## 3. Configuration Template ```nextflow // nextflow.config params { input_dir = './data' output_dir = './results' reference_data = './reference/atlas.h5ad' } process { withLabel: 'process_medium' { cpus = 4 memory = '8.GB' time = '2.h' } withLabel: 'process_high' { cpus = 8 memory = '16.GB' time = '4.h' } } docker { enabled = true runOptions = '-u $(id -u):$(id -g)' } ``` This provides: 1. **Production-ready QC pipeline** with filtering and reporting 2. **Spatial decomposition workflow** with evaluation metrics 3. **Flexible configuration** for different environments 4. **Comprehensive monitoring** and resource tracking