Spaces:
Sleeping
Sleeping
A newer version of the Gradio SDK is available:
6.9.0
Spatial Transcriptomics Pipeline Templates
1. Quality Control Workflow
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
params.input_pattern = "*.h5ad"
params.output_dir = "./results"
params.min_genes_per_cell = 200
process SPATIAL_QC {
tag "$sample_id"
label 'process_medium'
container 'quay.io/biocontainers/scanpy:1.9.1--pyhd8ed1ab_0'
publishDir "${params.output_dir}/qc", mode: 'copy'
input:
tuple val(sample_id), path(spatial_data)
output:
tuple val(sample_id), path("${sample_id}_qc.h5ad"), emit: filtered_data
path "${sample_id}_metrics.json", emit: metrics
script:
"""
#!/usr/bin/env python
import scanpy as sc
import json
adata = sc.read_h5ad('${spatial_data}')
# QC metrics
adata.var['mt'] = adata.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
# Filter cells and genes
sc.pp.filter_cells(adata, min_genes=${params.min_genes_per_cell})
sc.pp.filter_genes(adata, min_cells=3)
adata.write('${sample_id}_qc.h5ad')
metrics = {
'sample_id': '${sample_id}',
'n_cells': int(adata.n_obs),
'n_genes': int(adata.n_vars)
}
with open('${sample_id}_metrics.json', 'w') as f:
json.dump(metrics, f, indent=2)
"""
}
workflow {
input_ch = Channel.fromPath(params.input_pattern)
.map { file -> [file.baseName, file] }
SPATIAL_QC(input_ch)
}
2. Spatial Decomposition Pipeline
process SPATIAL_DECOMPOSITION {
tag "$sample_id"
label 'process_high'
container 'openproblems/spatial-decomposition:latest'
input:
tuple val(sample_id), path(spatial_data), path(reference_data)
output:
tuple val(sample_id), path("${sample_id}_decomposition.h5ad"), emit: results
path "${sample_id}_proportions.csv", emit: proportions
script:
"""
#!/usr/bin/env python
import anndata as ad
import pandas as pd
import numpy as np
# Load data
adata_spatial = ad.read_h5ad('${spatial_data}')
adata_reference = ad.read_h5ad('${reference_data}')
# Find common genes
common_genes = adata_spatial.var_names.intersection(adata_reference.var_names)
adata_spatial = adata_spatial[:, common_genes].copy()
adata_reference = adata_reference[:, common_genes].copy()
# Get cell types
cell_types = adata_reference.obs['cell_type'].unique()
# Placeholder decomposition (replace with actual method)
n_spots = adata_spatial.n_obs
n_cell_types = len(cell_types)
proportions_matrix = np.random.dirichlet(np.ones(n_cell_types), size=n_spots)
# Create proportions DataFrame
proportions_df = pd.DataFrame(
proportions_matrix,
columns=cell_types,
index=adata_spatial.obs_names
)
proportions_df.to_csv('${sample_id}_proportions.csv')
# Add proportions to spatial data
for cell_type in cell_types:
adata_spatial.obs[f'prop_{cell_type}'] = proportions_df[cell_type].values
adata_spatial.write('${sample_id}_decomposition.h5ad')
"""
}
3. Configuration Template
// nextflow.config
params {
input_dir = './data'
output_dir = './results'
reference_data = './reference/atlas.h5ad'
}
process {
withLabel: 'process_medium' {
cpus = 4
memory = '8.GB'
time = '2.h'
}
withLabel: 'process_high' {
cpus = 8
memory = '16.GB'
time = '4.h'
}
}
docker {
enabled = true
runOptions = '-u $(id -u):$(id -g)'
}
This provides:
- Production-ready QC pipeline with filtering and reporting
- Spatial decomposition workflow with evaluation metrics
- Flexible configuration for different environments
- Comprehensive monitoring and resource tracking