XYZ1998 commited on
Commit
9cf3910
·
verified ·
1 Parent(s): 9fc22b5

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +72 -1
  2. pyproject.toml +21 -0
  3. src/halo/__init__.py +2 -0
  4. src/halo/cli.py +112 -0
  5. src/halo/pipeline.py +213 -0
README.md CHANGED
@@ -1 +1,72 @@
1
- This is the model Halo for CellposeSAM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: halo
4
+ pipeline_tag: image-segmentation
5
+ tags:
6
+ - spatial
7
+ - xenium
8
+ - cell-segmentation
9
+ - cellpose
10
+ - microscopy
11
+ - bioimage
12
+ ---
13
+
14
+ # Halo
15
+
16
+ Halo is a lightweight pipeline that takes a Xenium dataset folder, builds a 2-channel preprocessing image (DAPI + transcript density), runs Cellpose with the `Halo` pretrained model name, and outputs a cell mask file.
17
+
18
+ ## Model Description
19
+
20
+ Halo is a wrapper pipeline around Xenium preprocessing and Cellpose inference. It is intended for whole-image inference without tiling.
21
+
22
+ ## Intended Use
23
+
24
+ - Xenium DAPI + transcript density preprocessing
25
+ - Whole-image cell segmentation using Cellpose
26
+
27
+ ## Inputs
28
+
29
+ - Xenium dataset directory containing morphology images and transcript tables
30
+ - DAPI image auto-detected from `morphology_focus/ch0000_dapi.ome.tif` or `morphology.ome.tif`
31
+
32
+ ## Outputs
33
+
34
+ - `halo_processed.tiff` (2-channel DAPI + transcript density)
35
+ - `cell_masks.npy` (default) or `cell_masks.tiff`
36
+
37
+ ## Usage
38
+
39
+ Install (editable):
40
+
41
+ ```bash
42
+ pip install -e /hpc/home/xz420/xingyuan/software/Halo
43
+ ```
44
+
45
+ Run:
46
+
47
+ ```bash
48
+ halo /path/to/xenium_dataset \
49
+ --out-dir /path/to/output \
50
+ --mask-format npy
51
+ ```
52
+
53
+ If `--out-dir` is omitted, outputs are written to the current working directory.
54
+
55
+ ## Parameters
56
+
57
+ - `--mask-format` set to `npy` or `tiff`
58
+ - `--processed-out` and `--mask-out` to override output filenames
59
+ - `--cpu` to force CPU inference
60
+
61
+ ## Limitations
62
+
63
+ - Full-image inference can require substantial RAM and GPU memory on large Xenium images
64
+ - Assumes Xenium coordinate system and transcript columns `x`, `y`, `qv`, and `feature_name`
65
+
66
+ ## Citation
67
+
68
+ If you use this pipeline in academic work, please cite Cellpose and Xenium references appropriate to your study.
69
+
70
+ ## Contact
71
+
72
+ For questions or improvements, open an issue in the repository.
pyproject.toml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "Halo"
7
+ version = "0.1.0"
8
+ description = "Xenium preprocessing and Cellpose XeniumSeg pipeline"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = {text = "MIT"}
12
+ authors = [{name = "Halo"}]
13
+
14
+ [project.scripts]
15
+ halo = "halo.cli:main"
16
+
17
+ [tool.setuptools]
18
+ package-dir = {"" = "src"}
19
+
20
+ [tool.setuptools.packages.find]
21
+ where = ["src"]
src/halo/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __all__ = ["__version__"]
2
+ __version__ = "0.1.0"
src/halo/cli.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ from pathlib import Path
6
+
7
+ from .pipeline import run_pipeline
8
+
9
+
10
+ def build_parser() -> argparse.ArgumentParser:
11
+ p = argparse.ArgumentParser(
12
+ description="Halo: Xenium preprocessing + Cellpose XeniumSeg pipeline"
13
+ )
14
+ p.add_argument(
15
+ "xenium_dir",
16
+ type=Path,
17
+ help="Path to Xenium dataset root directory",
18
+ )
19
+ p.add_argument(
20
+ "--out-dir",
21
+ type=Path,
22
+ default=None,
23
+ help="Directory for outputs (default: current working directory)",
24
+ )
25
+ p.add_argument(
26
+ "--dapi",
27
+ type=Path,
28
+ default=None,
29
+ help="Optional path to DAPI image (overrides auto-detect)",
30
+ )
31
+ p.add_argument(
32
+ "--processed-out",
33
+ type=Path,
34
+ default=None,
35
+ help="Optional path for the processed 2-channel TIFF",
36
+ )
37
+ p.add_argument(
38
+ "--mask-out",
39
+ type=Path,
40
+ default=None,
41
+ help="Optional path for the output cell mask",
42
+ )
43
+ p.add_argument(
44
+ "--mask-format",
45
+ choices=["npy", "tiff"],
46
+ default="npy",
47
+ help="Output mask format (default: npy)",
48
+ )
49
+ p.add_argument(
50
+ "--quantile",
51
+ type=float,
52
+ default=0.995,
53
+ help="Upper quantile for clipping DAPI intensity",
54
+ )
55
+ p.add_argument(
56
+ "--sigma",
57
+ type=float,
58
+ default=2.5,
59
+ help="Gaussian sigma for transcript density smoothing",
60
+ )
61
+ p.add_argument(
62
+ "--pixel-size",
63
+ type=float,
64
+ default=0.2125,
65
+ help="Microns per pixel for transcript binning",
66
+ )
67
+ p.add_argument(
68
+ "--chunk-size",
69
+ type=int,
70
+ default=1_000_000,
71
+ help="Transcripts processed per chunk",
72
+ )
73
+ p.add_argument(
74
+ "--qv-min",
75
+ type=int,
76
+ default=20,
77
+ help="Minimum transcript QV to keep",
78
+ )
79
+ p.add_argument(
80
+ "--cpu",
81
+ action="store_true",
82
+ help="Force CPU inference (disable GPU)",
83
+ )
84
+ return p
85
+
86
+
87
+ def main() -> None:
88
+ p = build_parser()
89
+ args = p.parse_args()
90
+
91
+ out_dir = args.out_dir if args.out_dir is not None else Path.cwd()
92
+
93
+ mask_path = run_pipeline(
94
+ xenium_dir=args.xenium_dir,
95
+ out_dir=out_dir,
96
+ dapi_path=args.dapi,
97
+ processed_out=args.processed_out,
98
+ mask_out=args.mask_out,
99
+ mask_format=args.mask_format,
100
+ quantile=args.quantile,
101
+ sigma=args.sigma,
102
+ pixel_size=args.pixel_size,
103
+ chunk_size=args.chunk_size,
104
+ qv_min=args.qv_min,
105
+ use_gpu=not args.cpu,
106
+ )
107
+
108
+ print(f"✓ Saved cell mask to {mask_path}")
109
+
110
+
111
+ if __name__ == "__main__":
112
+ main()
src/halo/pipeline.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+ from typing import Optional, Tuple
6
+
7
+ import numpy as np
8
+ from scipy.ndimage import gaussian_filter
9
+ import tifffile as tiff
10
+ from spatialdata_io import xenium
11
+ from skimage.draw import polygon
12
+ from cellpose import models
13
+
14
+ DEFAULT_PIXEL_SIZE = 0.2125 # microns per pixel
15
+ DEFAULT_CHUNK_SIZE = 1_000_000
16
+
17
+ BAD_PATTERNS = (
18
+ "UnassignedCodeword",
19
+ "NegControlCodeword",
20
+ "NegControlProbe",
21
+ "BLANK",
22
+ )
23
+
24
+
25
+ def find_dapi_image(xenium_dir: Path) -> Path:
26
+ """
27
+ Try to locate a Xenium DAPI image under the dataset directory.
28
+ Preference:
29
+ 1) morphology_focus/ch0000_dapi.ome.tif(.tiff)
30
+ 2) morphology.ome.tif(.tiff)
31
+ """
32
+ mf = xenium_dir / "morphology_focus"
33
+ for name in ["ch0000_dapi.ome.tif", "ch0000_dapi.ome.tiff"]:
34
+ cand = mf / name
35
+ if cand.exists():
36
+ return cand
37
+ for name in ["morphology.ome.tif", "morphology.ome.tiff"]:
38
+ cand = xenium_dir / name
39
+ if cand.exists():
40
+ return cand
41
+ raise FileNotFoundError(
42
+ "Could not find DAPI image. Tried morphology_focus/ch0000_dapi.ome.tif(f) "
43
+ "and morphology.ome.tif(f) under the Xenium dataset root."
44
+ )
45
+
46
+
47
+ def load_dapi(img_path: Path) -> np.ndarray:
48
+ img = tiff.imread(str(img_path))
49
+ if img.ndim == 2:
50
+ return img
51
+ if img.ndim == 3:
52
+ return img[0, :, :]
53
+ raise ValueError(f"Unexpected image ndim={img.ndim}; expected 2D or 3D")
54
+
55
+
56
+ def build_transcript_density(
57
+ xenium_path: Path,
58
+ shape_hw: Tuple[int, int],
59
+ pixel_size: float = DEFAULT_PIXEL_SIZE,
60
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
61
+ qv_min: int = 20,
62
+ sigma: float = 2.5,
63
+ ) -> np.ndarray:
64
+ h, w = shape_hw
65
+
66
+ sdata = xenium(str(xenium_path), morphology_focus=False)
67
+ trans = sdata.points["transcripts"]
68
+
69
+ required_cols = {"x", "y", "qv", "feature_name"}
70
+ missing = required_cols - set(trans.columns)
71
+ if missing:
72
+ raise KeyError(f"Missing expected columns in transcripts table: {missing}")
73
+
74
+ bad_regex = "|".join(BAD_PATTERNS)
75
+
76
+ trans_filt = trans[
77
+ (trans["qv"] >= qv_min)
78
+ & (~trans["feature_name"].astype(str).str.contains(bad_regex))
79
+ ]
80
+
81
+ trans_xy = trans_filt[["x", "y"]]
82
+ dask_arr = trans_xy.to_dask_array(lengths=True)
83
+
84
+ dens = np.zeros((h, w), dtype=np.uint32)
85
+
86
+ n_rows = dask_arr.shape[0]
87
+ for i in range(0, n_rows, chunk_size):
88
+ xy_chunk = dask_arr[i:i + chunk_size].compute()
89
+ x_pix = np.rint(xy_chunk[:, 0] / pixel_size).astype(np.int32)
90
+ y_pix = np.rint(xy_chunk[:, 1] / pixel_size).astype(np.int32)
91
+
92
+ mask = (x_pix >= 0) & (x_pix < w) & (y_pix >= 0) & (y_pix < h)
93
+ np.add.at(dens, (y_pix[mask], x_pix[mask]), 1)
94
+
95
+ dens = gaussian_filter(dens.astype(np.float32), sigma=sigma)
96
+ return dens
97
+
98
+
99
+ def save_true_cell_mask(
100
+ xenium_path: Path,
101
+ shape_hw: Tuple[int, int],
102
+ out_path: Path,
103
+ pixel_size: float = DEFAULT_PIXEL_SIZE,
104
+ ) -> None:
105
+ h, w = shape_hw
106
+ sdata = xenium(str(xenium_path), morphology_focus=False)
107
+ true_boundary = sdata.shapes["cell_boundaries"]["geometry"]
108
+ true_masks = np.zeros((h, w), dtype=np.int32)
109
+ for idx, geom in enumerate(true_boundary):
110
+ coords = np.array(geom.exterior.coords)
111
+ rows = np.round(coords[:, 1] / pixel_size).astype(int)
112
+ cols = np.round(coords[:, 0] / pixel_size).astype(int)
113
+ rr, cc = polygon(rows, cols, shape=(h, w))
114
+ true_masks[rr, cc] = idx
115
+ np.save(str(out_path), true_masks)
116
+
117
+
118
+ def preprocess(
119
+ img_path: Path,
120
+ xenium_path: Path,
121
+ out_path: Path,
122
+ quantile: float = 0.995,
123
+ sigma: float = 2.5,
124
+ pixel_size: float = DEFAULT_PIXEL_SIZE,
125
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
126
+ qv_min: int = 20,
127
+ ) -> np.ndarray:
128
+ dapi = load_dapi(img_path)
129
+
130
+ clip_val = np.quantile(dapi, quantile)
131
+ dapi = dapi.clip(0, clip_val)
132
+ h, w = dapi.shape
133
+
134
+ dens = build_transcript_density(
135
+ xenium_path,
136
+ (h, w),
137
+ pixel_size=pixel_size,
138
+ chunk_size=chunk_size,
139
+ qv_min=qv_min,
140
+ sigma=sigma,
141
+ )
142
+
143
+ dapi_norm = dapi / dapi.max() if dapi.max() > 0 else dapi
144
+ dens_norm = dens / dens.max() if dens.max() > 0 else dens
145
+
146
+ stack = np.stack((dapi_norm, dens_norm), axis=-1).astype(np.float32)
147
+ stack_u16 = (stack * 65535).astype(np.uint16)
148
+ tiff.imwrite(str(out_path), stack_u16, photometric="minisblack", metadata=None)
149
+ return stack
150
+
151
+
152
+ def run_cellpose(
153
+ img: np.ndarray,
154
+ model_name: str = "Halo",
155
+ use_gpu: bool = True,
156
+ channels: Tuple[int, int] = (1, 0),
157
+ ) -> np.ndarray:
158
+ model = models.CellposeModel(gpu=use_gpu, pretrained_model=model_name)
159
+ masks_pred, *_ = model.eval([img], channels=list(channels), normalize=True)
160
+ return masks_pred[0]
161
+
162
+
163
+ def save_mask(mask: np.ndarray, out_path: Path, fmt: str) -> None:
164
+ fmt = fmt.lower()
165
+ if fmt == "npy":
166
+ np.save(str(out_path), mask)
167
+ elif fmt == "tiff" or fmt == "tif":
168
+ # use uint32 to preserve labels
169
+ tiff.imwrite(str(out_path), mask.astype(np.uint32), photometric="minisblack")
170
+ else:
171
+ raise ValueError("mask format must be 'npy' or 'tiff'")
172
+
173
+
174
+ def run_pipeline(
175
+ xenium_dir: Path,
176
+ out_dir: Path,
177
+ dapi_path: Optional[Path] = None,
178
+ processed_out: Optional[Path] = None,
179
+ mask_out: Optional[Path] = None,
180
+ mask_format: str = "npy",
181
+ quantile: float = 0.995,
182
+ sigma: float = 2.5,
183
+ pixel_size: float = DEFAULT_PIXEL_SIZE,
184
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
185
+ qv_min: int = 20,
186
+ use_gpu: bool = True,
187
+ ) -> Path:
188
+ xenium_dir = xenium_dir.resolve()
189
+ out_dir.mkdir(parents=True, exist_ok=True)
190
+
191
+ if dapi_path is None:
192
+ dapi_path = find_dapi_image(xenium_dir)
193
+
194
+ if processed_out is None:
195
+ processed_out = out_dir / "halo_processed.tiff"
196
+
197
+ if mask_out is None:
198
+ mask_out = out_dir / ("cell_masks.npy" if mask_format == "npy" else "cell_masks.tiff")
199
+
200
+ img = preprocess(
201
+ img_path=dapi_path,
202
+ xenium_path=xenium_dir,
203
+ out_path=processed_out,
204
+ quantile=quantile,
205
+ sigma=sigma,
206
+ pixel_size=pixel_size,
207
+ chunk_size=chunk_size,
208
+ qv_min=qv_min,
209
+ )
210
+
211
+ masks = run_cellpose(img, model_name="Halo", use_gpu=use_gpu, channels=(1, 0))
212
+ save_mask(masks, mask_out, mask_format)
213
+ return mask_out