| import argparse |
| import numpy as np |
| import openslide |
| import os |
| import pandas as pd |
| import time |
| import h5py |
| from PIL import Image |
|
|
| |
| Image.MAX_IMAGE_PIXELS = None |
|
|
| |
|
|
| from .wsi_core.WholeSlideImage import WholeSlideImage |
| from .wsi_core.wsi_utils import StitchCoords |
| from .wsi_core.batch_process_utils import initialize_df |
|
|
| |
| def str_to_bool(v): |
| """Parse a string to boolean. |
| |
| Accepts: 'true', 't', 'yes', 'y', '1', 'on' => True |
| 'false', 'f', 'no', 'n', '0', 'off' => False |
| Case-insensitive. Also passes through bool values unchanged. |
| """ |
| if isinstance(v, bool): |
| return v |
| if v is None: |
| raise argparse.ArgumentTypeError('Boolean value expected.') |
| val = str(v).strip().lower() |
| if val in ('1', 'true', 't', 'yes', 'y', 'on'): |
| return True |
| if val in ('0', 'false', 'f', 'no', 'n', 'off'): |
| return False |
| raise argparse.ArgumentTypeError("Boolean value expected (e.g., 'true' or 'false').") |
|
|
| def stitching(file_path, wsi_object, downscale = 64): |
| start = time.time() |
| heatmap = StitchCoords(file_path, wsi_object, downscale=downscale, bg_color=(0,0,0), alpha=-1, draw_grid=False) |
| total_time = time.time() - start |
| |
| return heatmap, total_time |
|
|
| def segment(WSI_object, seg_params = None, filter_params = None, mask_file = None): |
| |
| start_time = time.time() |
| |
| if mask_file is not None: |
| WSI_object.initSegmentation(mask_file) |
| |
| else: |
| WSI_object.segmentTissue(**seg_params, filter_params=filter_params) |
|
|
| |
| seg_time_elapsed = time.time() - start_time |
| return WSI_object, seg_time_elapsed |
|
|
| def patching(WSI_object, patient_id, **kwargs): |
| |
| start_time = time.time() |
|
|
| |
| file_path = WSI_object.process_contours(patient_id, **kwargs) |
|
|
|
|
| |
| patch_time_elapsed = time.time() - start_time |
| return file_path, patch_time_elapsed |
|
|
|
|
| def _h5_is_complete(h5_path: str) -> bool: |
| """Check if an HDF5 file has a 'complete' marker and at least one coord. |
| |
| This avoids auto-skipping for partially written files from interrupted runs. |
| """ |
| if not os.path.isfile(h5_path): |
| return False |
| try: |
| with h5py.File(h5_path, 'r') as f: |
| complete = bool(f.attrs.get('complete', False)) |
| has_coords = 'coords' in f and len(f['coords']) > 0 |
| return complete and has_coords |
| except Exception: |
| |
| return False |
|
|
|
|
| def _get_mpp_from_properties(wsi) -> tuple: |
| """Try to infer MPP (microns-per-pixel) from slide properties. |
| |
| Order of checks: |
| 1) openslide.mpp-x (standard key) |
| 2) aperio.MPP (Aperio-specific) |
| 3) tiff.XResolution with tiff.ResolutionUnit (convert to microns) |
| 4) openslide.objective-power -> heuristic mapping (10x=1.0, 20x=0.5, 40x=0.25) |
| |
| Returns: |
| (mpp: float | None, source: str) where source indicates the origin of the value. |
| """ |
| try: |
| props = getattr(wsi, 'properties', {}) |
| except Exception: |
| props = {} |
|
|
| |
| try: |
| suspicious_upper_bound = 1000.0 |
| mpp_val = props.get(openslide.PROPERTY_NAME_MPP_X) or props.get('openslide.mpp-x') |
| if mpp_val is not None: |
| mpp = float(mpp_val) |
| |
| if mpp == 100: |
| mpp = 0.5 |
| if mpp > 0: |
| |
| if mpp < suspicious_upper_bound: |
| return mpp, 'openslide.mpp-x' |
| except Exception: |
| pass |
|
|
| |
| try: |
| aperio_mpp = props.get('aperio.MPP') |
| if aperio_mpp is not None: |
| mpp = float(aperio_mpp) |
| if mpp > 0: |
| return mpp, 'aperio.MPP' |
| except Exception: |
| pass |
|
|
| |
| try: |
| if 'tiff.XResolution' in props: |
| xres = float(props['tiff.XResolution']) |
| unit_raw = props.get('tiff.ResolutionUnit', '') |
| unit_str = str(unit_raw).strip().lower() |
| |
| if unit_str in {'2', 'inch', 'inches'}: |
| mpp = 25400.0 / xres |
| elif unit_str in {'3', 'centimeter', 'centimetre', 'cm'}: |
| mpp = 10000.0 / xres |
| else: |
| |
| mpp = 10000.0 / xres |
| if mpp > 0: |
| return mpp, 'tiff.XResolution' |
| except Exception: |
| pass |
|
|
| |
| try: |
| obj = props.get(openslide.PROPERTY_NAME_OBJECTIVE_POWER) or props.get('openslide.objective-power') |
| if obj is not None: |
| obj = float(obj) |
| if obj > 0: |
| |
| mpp = 10.0 / obj |
| if mpp > 0: |
| return mpp, 'objective-power-heuristic' |
| except Exception: |
| pass |
|
|
| return None, 'unknown' |
|
|
|
|
| def _infer_or_assume_mpp(wsi, assumed_mpp: float = 0.5) -> tuple: |
| """Return (mpp, source), falling back to assumed_mpp when metadata isn't available. |
| |
| This centralizes MPP inference so patch sizing and coord scaling stay consistent. |
| """ |
| mpp, src = _get_mpp_from_properties(wsi) |
| if mpp is None or not np.isfinite(mpp) or mpp <= 0: |
| return float(assumed_mpp), 'assumed' |
| return float(mpp), src |
|
|
|
|
| def _extract_patient_wsi_ids(slide_id: str): |
| """Return (patient_id, wsi_id) from a relative slide path without extension. |
| |
| - Preserves full parent directory path so subtypes (e.g., "AML/") are kept. |
| - For non-nested ids (no '/'), both patient_id and wsi_id fall back to slide_id. |
| """ |
| |
| if '/' not in slide_id: |
| return slide_id, slide_id |
| |
| parts = slide_id.split('/') |
| wsi_id = parts[-1] |
| patient_id = '/'.join(parts[:-1]) if len(parts) > 1 else wsi_id |
| return patient_id, wsi_id |
|
|
|
|
| def seg_and_patch(source, save_dir, patch_save_dir, mask_save_dir, stitch_save_dir, |
| patch_size = 256, step_size = 256, |
| seg_params = {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False, |
| 'keep_ids': 'none', 'exclude_ids': 'none', 'seg_downsample': 1.0}, |
| filter_params = {'a_t':100, 'a_h': 16, 'max_n_holes':8}, |
| vis_params = {'vis_level': -1, 'line_thickness': 500}, |
| patch_params = {'use_padding': True, 'contour_fn': 'four_pt'}, |
| patch_level = 0, |
| use_default_params = False, |
| seg = False, save_mask = True, |
| stitch= False, |
| patch = False, auto_skip=True, process_list = None, rank=0, world_size=1, |
| assumed_mpp: float = 0.5): |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| allowed_suffixes = ('.svs', '.tif', '.tiff', '.ndpi', '.mrxs', '.bif') |
|
|
| def is_wsi_candidate(name: str) -> bool: |
| """Return True if the filename ends with an allowed WSI suffix (WSI formats only). |
| Strict endswith prevents including non-WSI sidecar files (e.g., *.svs.cloud_transfer.*) and excludes JPGs. |
| """ |
| lname = name.lower() |
| return lname.endswith(allowed_suffixes) |
|
|
| slides = [] |
| for root, _dirs, files in os.walk(source): |
| |
| for file in sorted(files): |
| if is_wsi_candidate(file): |
| full_path = os.path.join(root, file) |
| |
| rel_path = os.path.relpath(full_path, start=source) |
| |
| slides.append(rel_path.replace('\\', '/')) |
|
|
| |
| """ |
| slides = ['6a0ea716-a5f2-47f3-880b-537a5cdc2324/TCGA-86-8074-01Z-00-DX1.0c34b434-8701-4060-a4ea-08a72371ee1e.svs', |
| '083fea50-313e-42dc-bd80-4b70fec04ddf/TCGA-86-8279-01Z-00-DX1.fd12b60e-d181-454b-a655-298a973a849d.svs', |
| '64cc65c6-2076-40cb-8ec7-91f29ca8cd1d/TCGA-86-8076-01Z-00-DX1.e7378b2f-e20e-4d2f-a86c-3a8ead08a385.svs', |
| 'c9e28a52-98a1-40f0-91c8-c8ea09a87a36/TCGA-86-8075-01Z-00-DX1.171bd8bd-af24-4770-b24b-732e675efd75.svs', |
| '1ac67634-7c99-445e-abb0-7ed7f801a080/TCGA-86-7953-01Z-00-DX1.847865ce-df59-4677-bac2-ee88a258fe4e.svs', |
| 'ce057ce4-d194-4189-a91a-981962a7e354/TCGA-86-7714-01Z-00-DX1.8ee03c3b-013e-4dd5-aa0e-9c1a771cbdc5.svs', |
| 'c7aad125-c092-4451-8480-993a3d982879/TCGA-86-8056-01Z-00-DX1.eee7e03a-842c-4a44-bff8-f7b906725605.svs', |
| '855d4a48-b3d4-4088-a6a0-bca7d75ccefa/TCGA-86-7711-01Z-00-DX1.f64dd9d8-b9ca-4d1f-9783-d1042979132d.svs', |
| 'c6e36f24-46f3-4e20-8154-970e749aa0ca/TCGA-86-8278-01Z-00-DX1.6500afef-f0f0-4e7c-a6c0-60da81993641.svs', |
| '5192c4d5-69df-400e-af31-c964c9512bd7/TCGA-86-8054-01Z-00-DX1.2c4c08f6-be1c-46d1-a719-5983cade0c54.svs', |
| '0edfc16d-25cc-403d-8862-38ec9c90060a/TCGA-50-6597-01Z-00-DX1.ec7fc0b2-78a1-4384-bddd-e89f02ee5eb6.svs', |
| 'b5506234-c969-4b27-89f5-5dffead8683a/TCGA-95-7043-01Z-00-DX1.AE0FD8AA-9B88-45FE-B247-402BED1285EF.svs', |
| '94dfeae0-aaa6-405f-bf37-1c17d207f0ca/TCGA-86-7954-01Z-00-DX1.0a063ee6-18b7-4b66-9726-04fb452156cb.svs', |
| '432c8861-64dd-4587-b597-93fb748cb4cd/TCGA-50-5044-01Z-00-DX1.7E0E651F-411B-4784-B0FA-6EB612527430.svs', |
| '325bc10e-b478-44d8-92f7-ddfd6e2daf2b/TCGA-86-8280-01Z-00-DX1.52627b97-1cfa-4382-819b-949b58c0f995.svs', |
| 'd3519b7a-b09e-4900-a0ba-dbd44909a43f/TCGA-86-7955-01Z-00-DX1.ef4f4d94-5efb-4a07-97cf-b0ed69085827.svs', |
| '48db62d6-8fc8-4059-8553-59c88ee46fa2/TCGA-86-7713-01Z-00-DX1.23f9e213-b566-47bb-beb8-7d12b2f0508b.svs', |
| 'b53d1202-bb6f-4494-9e01-4684e09486ba/TCGA-86-8055-01Z-00-DX1.546dc42e-3742-4da5-8f9b-80732180ce76.svs', |
| '035fdf1f-b813-4ea6-9395-f2ea38ccaee6/TCGA-86-6851-01Z-00-DX1.0b13e600-fd7b-44a2-9ec2-26e9938fb7bc.svs', |
| '3cde1c79-65c1-4d3b-8b0b-f8e81a512865/TCGA-69-7980-01Z-00-DX1.8bbf8cc0-eca7-49e5-a022-c22e3e6ed6dc.svs', |
| 'f8e5a0da-e46f-4ac9-be5e-ae11327e5a26/TCGA-86-8073-01Z-00-DX1.33c016fc-5c9e-4ad6-8de2-a7f8521d205c.svs', |
| '82b362c4-c5a0-40c7-889b-a2694e5cb6ce/TCGA-50-5931-01Z-00-DX1.34261ED6-7815-487C-A50C-2DAD587187B9.svs', |
| '78b2104d-3173-441e-8e33-46ab57f3ef42/TCGA-86-7701-01Z-00-DX1.a8a6e71e-9fa9-42c6-a186-0ac7526e9960.svs', |
| '1c27c0a1-5e0f-4143-a6c2-a22af744830b/TCGA-50-6593-01Z-00-DX1.a63e298c-cbe7-44e8-8d8e-34ebb93530ca.svs', |
| ] |
| """ |
|
|
| |
| def remove_elements_with_string(file_list, string): |
| return [file for file in file_list if string not in file] |
|
|
| file_to_remove = '18ed4f5f-5f1c-4f87-a79c-e8d2b6f167b0/TCGA-19-1389-01Z-00-DX1.bd925898-9fb1-4c7c-81b6-9b492e956ca1.svs' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = '035a89f8-a30c-4039-8424-0c38a32546ff/TCGA-06-1086-01Z-00-DX2.e1961f1f-a823-4775-acf7-04a46f05e15e.svs' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = 'patient_103_node_1.tif' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = '11327473-2921-4916-a111-89d0eda6be8a/TCGA-A8-A06U-01A-01-TS1.63824040-373f-4c6c-a74e-881c127567a6.svs' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = '9c393d1e-c0ef-453f-882c-cd5541fa07fd/TCGA-CV-7242-11A-01-TS1.1838afb1-9eee-4a70-9ae3-50e3ab45e242.svs' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = 'AML/C3L-06352-41.svs' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = 'AML/C3L-05870-42.svs' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = 'CM/C3L-02622-28.svs' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = 'LG-0252_B.svs' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = '3790f55cad63053e956fb73027179707.tiff' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = 'Angiomatous meningioma/a1982bd3-357f-11eb-aec7-001a7dda7111.ndpi' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| file_to_remove = 'a1982bd3-357f-11eb-aec7-001a7dda7111.ndpi' |
| slides = remove_elements_with_string(slides, file_to_remove) |
| |
|
|
| |
| |
| if process_list is None: |
| base_list_file_name = 'process_list_autogen.csv' |
| else: |
| base_list_file_name = os.path.basename(process_list) |
|
|
| if process_list is None: |
| full_df = initialize_df(slides, seg_params, filter_params, vis_params, patch_params) |
| else: |
| full_df = pd.read_csv(process_list) |
| full_df = initialize_df(full_df, seg_params, filter_params, vis_params, patch_params) |
|
|
| rank = 0 if rank is None else int(rank) |
| world_size = int(world_size) if world_size else 1 |
| if world_size < 1: |
| world_size = 1 |
|
|
| |
| |
| if 'status' in full_df.columns: |
| candidate_mask = full_df['status'].astype(str).eq('tbp') |
| |
| full_df['process'] = 0 |
| full_df.loc[candidate_mask, 'process'] = 1 |
| else: |
| |
| if 'process' in full_df.columns: |
| candidate_mask = full_df['process'] == 1 |
| else: |
| candidate_mask = pd.Series([True] * len(full_df), index=full_df.index) |
|
|
| process_candidates = full_df[candidate_mask] |
| total_candidates = len(process_candidates) |
| print("total:", total_candidates) |
|
|
| candidate_indices = list(process_candidates.index) |
| if rank < 0 or rank >= world_size: |
| assigned_indices = [] |
| elif world_size > 1: |
| assigned_indices = [idx for order, idx in enumerate(candidate_indices) if order % world_size == rank] |
| else: |
| assigned_indices = candidate_indices |
|
|
| assigned_count = len(assigned_indices) |
| print(f"rank {rank}/{world_size} assigned slides: {assigned_count}") |
|
|
| df = full_df.loc[assigned_indices].copy() |
| process_stack = df[df['process'] == 1] |
| total = len(process_stack) |
|
|
| base_stem, base_ext = os.path.splitext(base_list_file_name) |
| if not base_ext: |
| base_ext = '.csv' |
| if world_size > 1: |
| tentative_name = f'{base_stem}_rank{rank:04d}{base_ext}' |
| else: |
| tentative_name = f'{base_stem}{base_ext}' |
|
|
| |
| list_file_path = os.path.join(save_dir, tentative_name) |
| if os.path.exists(list_file_path): |
| ts = time.strftime('%Y%m%d_%H%M%S') |
| if world_size > 1: |
| list_file_name = f'{base_stem}_rank{rank:04d}_{ts}{base_ext}' |
| else: |
| list_file_name = f'{base_stem}_{ts}{base_ext}' |
| else: |
| list_file_name = tentative_name |
|
|
| if total == 0: |
| print(f"rank {rank} has no slides to process.") |
|
|
| legacy_support = 'a' in df.keys() |
| if legacy_support: |
| print('detected legacy segmentation csv file, legacy support enabled') |
| df = df.assign(**{'a_t': np.full((len(df)), int(filter_params['a_t']), dtype=np.uint32), |
| 'a_h': np.full((len(df)), int(filter_params['a_h']), dtype=np.uint32), |
| 'max_n_holes': np.full((len(df)), int(filter_params['max_n_holes']), dtype=np.uint32), |
| 'line_thickness': np.full((len(df)), int(vis_params['line_thickness']), dtype=np.uint32), |
| 'contour_fn': np.full((len(df)), patch_params['contour_fn'])}) |
|
|
| seg_times = 0. |
| patch_times = 0. |
| stitch_times = 0. |
|
|
| |
| for i in range(total): |
| df.to_csv(os.path.join(save_dir, list_file_name), index=False) |
| idx = process_stack.index[i] |
| slide = process_stack.loc[idx, 'slide_id'] |
| print("\n\nprogress: {:.2f}, {}/{}".format(i/total, i, total)) |
| print('processing {}'.format(slide)) |
| |
| |
| skip_as_failed_seg = { |
| 'a7a607ab-7b7a-469d-bffb-6003cce2d6ae/TCGA-UZ-A9PQ-01Z-00-DX1.C2CB0E94-2548-4399-BCAB-E4D556D533EF.svs' |
| } |
| if slide in skip_as_failed_seg: |
| print(f"{slide} is known to fail; marking as failed_seg and skipping.") |
| df.loc[idx, 'status'] = 'failed_seg' |
| df.loc[idx, 'process'] = 0 |
| continue |
| |
| df.loc[idx, 'process'] = 0 |
| slide_id, _ = os.path.splitext(slide) |
|
|
| if auto_skip: |
| |
| legacy_path = os.path.join(patch_save_dir, slide_id + '.h5') |
| nested_path = os.path.join(patch_save_dir, slide_id, slide_id + '.h5') |
| |
| patient_id, wsi_id = _extract_patient_wsi_ids(slide_id) |
| canonical_path = os.path.join(patch_save_dir, patient_id, wsi_id + '.h5') |
|
|
| existing_paths = [p for p in [canonical_path, legacy_path, nested_path] if os.path.isfile(p)] |
| if existing_paths: |
| |
| candidate = canonical_path if os.path.isfile(canonical_path) else existing_paths[0] |
| if _h5_is_complete(candidate): |
| print('{} already exist in destination location (complete), skipped'.format(slide_id)) |
| df.loc[idx, 'status'] = 'already_exist' |
| continue |
| else: |
| print(f"found incomplete h5 for {slide_id}: {candidate} -> removing and regenerating") |
| try: |
| os.remove(candidate) |
| except Exception as e: |
| print(f"warning: failed to remove incomplete file {candidate}: {e}") |
|
|
| |
| full_path = os.path.join(source, slide) |
| WSI_object = WholeSlideImage(full_path) |
|
|
| if use_default_params: |
| current_vis_params = vis_params.copy() |
| current_filter_params = filter_params.copy() |
| current_seg_params = seg_params.copy() |
| current_patch_params = patch_params.copy() |
| |
| else: |
| current_vis_params = {} |
| current_filter_params = {} |
| current_seg_params = {} |
| current_patch_params = {} |
|
|
|
|
| for key in vis_params.keys(): |
| if legacy_support and key == 'vis_level': |
| df.loc[idx, key] = -1 |
| current_vis_params.update({key: df.loc[idx, key]}) |
|
|
| for key in filter_params.keys(): |
| if legacy_support and key == 'a_t': |
| old_area = df.loc[idx, 'a'] |
| seg_level = df.loc[idx, 'seg_level'] |
| scale = WSI_object.level_downsamples[seg_level] |
| adjusted_area = int(old_area * (scale[0] * scale[1]) / (512 * 512)) |
| current_filter_params.update({key: adjusted_area}) |
| df.loc[idx, key] = adjusted_area |
| current_filter_params.update({key: df.loc[idx, key]}) |
|
|
| for key in seg_params.keys(): |
| if legacy_support and key == 'seg_level': |
| df.loc[idx, key] = -1 |
| current_seg_params.update({key: df.loc[idx, key]}) |
|
|
| for key in patch_params.keys(): |
| current_patch_params.update({key: df.loc[idx, key]}) |
|
|
| if current_vis_params['vis_level'] < 0: |
| if len(WSI_object.level_dim) == 1: |
| current_vis_params['vis_level'] = 0 |
| |
| else: |
| wsi = WSI_object.getOpenSlide() |
| best_level = wsi.get_best_level_for_downsample(64) |
| current_vis_params['vis_level'] = best_level |
|
|
| if current_seg_params['seg_level'] < 0: |
| if len(WSI_object.level_dim) == 1: |
| current_seg_params['seg_level'] = 0 |
| else: |
| wsi = WSI_object.getOpenSlide() |
| best_level = wsi.get_best_level_for_downsample(64) |
| current_seg_params['seg_level'] = best_level |
|
|
|
|
| keep_ids = str(current_seg_params['keep_ids']) |
| if keep_ids != 'none' and len(keep_ids) > 0: |
| str_ids = current_seg_params['keep_ids'] |
| current_seg_params['keep_ids'] = np.array(str_ids.split(',')).astype(int) |
| else: |
| current_seg_params['keep_ids'] = [] |
|
|
| exclude_ids = str(current_seg_params['exclude_ids']) |
| if exclude_ids != 'none' and len(exclude_ids) > 0: |
| str_ids = current_seg_params['exclude_ids'] |
| current_seg_params['exclude_ids'] = np.array(str_ids.split(',')).astype(int) |
| else: |
| current_seg_params['exclude_ids'] = [] |
|
|
| w, h = WSI_object.level_dim[current_seg_params['seg_level']] |
| l = current_seg_params['seg_level'] |
| print('seg_level {}, level_dim {} x {}'.format(l, w, h)) |
| if w * h > 4e10: |
| print('level_dim {} x {} is likely too large for successful segmentation, aborting'.format(w, h)) |
| df.loc[idx, 'status'] = 'failed_seg' |
| continue |
|
|
| df.loc[idx, 'vis_level'] = current_vis_params['vis_level'] |
| df.loc[idx, 'seg_level'] = current_seg_params['seg_level'] |
|
|
|
|
| seg_time_elapsed = -1 |
| if seg: |
| try: |
| WSI_object, seg_time_elapsed = segment(WSI_object, current_seg_params, current_filter_params) |
| except Exception as e: |
| print(str(e)) |
| current_seg_params['seg_level']=2 |
| WSI_object, seg_time_elapsed = segment(WSI_object, current_seg_params, current_filter_params) |
|
|
| if len(WSI_object.contours_tissue)==0: |
| print('failed to extract contours') |
| df.loc[idx, 'status'] = 'failed_seg' |
| continue |
| |
| if save_mask: |
| mask = WSI_object.visWSI(**current_vis_params) |
| seg_ds = current_seg_params.get('seg_downsample', 1) |
| try: |
| seg_ds = float(seg_ds) |
| except (TypeError, ValueError): |
| seg_ds = 1 |
| if seg_ds > 1: |
| new_w = max(1, int(mask.width / seg_ds)) |
| new_h = max(1, int(mask.height / seg_ds)) |
| |
| mask = mask.resize((new_w, new_h), resample=Image.Resampling.BILINEAR) |
| patient_id, wsi_id = _extract_patient_wsi_ids(slide_id) |
| os.makedirs(os.path.join(mask_save_dir, patient_id), exist_ok=True) |
| |
| try: |
| mask_path = os.path.join(mask_save_dir, patient_id, wsi_id+'.jpg') |
| mask.save(mask_path) |
| except (OSError, ValueError) as e: |
| print(f"JPG save failed ({e}), using PNG instead for {wsi_id}") |
| mask_path = os.path.join(mask_save_dir, patient_id, wsi_id+'.png') |
| mask.save(mask_path) |
|
|
| patch_time_elapsed = -1 |
| if patch: |
| patient_id, wsi_id = _extract_patient_wsi_ids(slide_id) |
| os.makedirs(os.path.join(patch_save_dir, patient_id), exist_ok=True) |
| |
| print(WSI_object.wsi.properties) |
| if 'ver-' in save_dir: |
| |
| this_mpp, mpp_src = _infer_or_assume_mpp(WSI_object.wsi, assumed_mpp=assumed_mpp) |
| print(f"mpp: {this_mpp} (source={mpp_src})") |
| standard_mpp = 0.5 |
| crop_factor = standard_mpp / this_mpp |
| new_patch_size = int(patch_size * crop_factor) |
| new_step_size = int(step_size * crop_factor) |
| print("patch_size: ", new_patch_size ) |
| else: |
| |
| this_mpp, mpp_src = _infer_or_assume_mpp(WSI_object.wsi, assumed_mpp=assumed_mpp) |
| print(f"mpp: {this_mpp} (source={mpp_src})") |
| standard_mpp = 0.5 |
| crop_factor = round(standard_mpp / this_mpp) |
| if crop_factor <= 0: |
| crop_factor = 1 |
| new_patch_size = int(patch_size * crop_factor) |
| new_step_size = int(step_size * crop_factor) |
| print("patch_size: ", new_patch_size ) |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| current_patch_params.update({'patch_level': patch_level, 'patch_size': new_patch_size, 'step_size': new_step_size, |
| 'save_path': patch_save_dir}) |
| file_path, patch_time_elapsed = patching(WSI_object = WSI_object, patient_id=patient_id, **current_patch_params,) |
| |
| if file_path and not _h5_is_complete(file_path): |
| print(f"warning: generated file missing completion marker: {file_path}") |
| |
| stitch_time_elapsed = -1 |
| if stitch: |
| patient_id, wsi_id = _extract_patient_wsi_ids(slide_id) |
| file_path = os.path.join(patch_save_dir, patient_id, wsi_id+'.h5') |
| if os.path.isfile(file_path): |
| heatmap, stitch_time_elapsed = stitching(file_path, WSI_object, downscale=64) |
| |
| os.makedirs(os.path.join(stitch_save_dir, patient_id), exist_ok=True) |
| |
| try: |
| stitch_path = os.path.join(stitch_save_dir, patient_id, wsi_id+'.jpg') |
| heatmap.save(stitch_path) |
| except (OSError, ValueError) as e: |
| print(f"JPG save failed ({e}), using PNG instead for {wsi_id}") |
| stitch_path = os.path.join(stitch_save_dir, patient_id, wsi_id+'.png') |
| heatmap.save(stitch_path) |
|
|
| print("segmentation took {} seconds".format(seg_time_elapsed)) |
| print("patching took {} seconds".format(patch_time_elapsed)) |
| print("stitching took {} seconds".format(stitch_time_elapsed)) |
| df.loc[idx, 'status'] = 'processed' |
|
|
| seg_times += seg_time_elapsed |
| patch_times += patch_time_elapsed |
| stitch_times += stitch_time_elapsed |
|
|
| if total > 0: |
| seg_times /= total |
| patch_times /= total |
| stitch_times /= total |
| else: |
| seg_times = 0.0 |
| patch_times = 0.0 |
| stitch_times = 0.0 |
|
|
| df.to_csv(os.path.join(save_dir, list_file_name), index=False) |
| print("average segmentation time in s per slide: {}".format(seg_times)) |
| print("average patching time in s per slide: {}".format(patch_times)) |
| print("average stiching time in s per slide: {}".format(stitch_times)) |
| |
| return seg_times, patch_times |
|
|
| def get_args_parser(): |
| parser = argparse.ArgumentParser(description='seg and patch', add_help=False) |
|
|
| parser.add_argument('--source', type = str, |
| help='path to folder containing raw wsi image files') |
| parser.add_argument('--step_size', type = int, default=256, |
| help='step_size') |
| parser.add_argument('--patch_size', type = int, default=256, |
| help='patch_size') |
| parser.add_argument('--patch', type=str_to_bool, default=True, |
| help="whether to run patching; accepts true/false") |
| parser.add_argument('--seg', type=str_to_bool, default=True, |
| help="whether to run segmentation; accepts true/false") |
| parser.add_argument('--stitch', type=str_to_bool, default=False, |
| help="whether to run stitching; accepts true/false") |
| parser.add_argument('--auto_skip', type=str_to_bool, default=False, |
| help="if true, auto-skip is enabled (skip slides with existing outputs); if false, reprocess even if outputs exist") |
| parser.add_argument('--save_dir', type = str, |
| help='directory to save processed data') |
| parser.add_argument('--preset', default=None, type=str, |
| help='predefined profile of default segmentation and filter parameters (.csv)') |
| parser.add_argument('--patch_level', type=int, default=0, |
| help='downsample level at which to patch') |
| parser.add_argument('--process_list', type = str, default=None, |
| help='name of list of images to process with parameters (.csv)') |
| parser.add_argument('--rank', type=int, default=0, |
| help='Process rank used to shard slides across parallel jobs') |
| parser.add_argument('--world_size', type=int, default=1, |
| help='Total number of parallel jobs that split the slide list') |
| parser.add_argument('--assumed_mpp', type=float, default=0.5, |
| help='Fallback microns-per-pixel to use when slide metadata lacks MPP information (default: 0.5).') |
| parser.add_argument('--seg_downsample', type=float, default=1.0, |
| help='Optional extra downsample factor (>1) applied before segmentation to speed up single-level slides.') |
| return parser |
|
|
| def main(args): |
| """ |
| Entrypoint for running seg-and-patch from code or CLI. |
| """ |
|
|
| patch_save_dir = os.path.join(args.save_dir, 'patches') |
| mask_save_dir = os.path.join(args.save_dir, 'masks') |
| stitch_save_dir = os.path.join(args.save_dir, 'stitches') |
|
|
| if args.process_list: |
| process_list = os.path.join(args.save_dir, args.process_list) |
| else: |
| process_list = None |
|
|
| print('source: ', args.source) |
| print('patch_save_dir: ', patch_save_dir) |
| print('mask_save_dir: ', mask_save_dir) |
| print('stitch_save_dir: ', stitch_save_dir) |
| |
| directories = {'source': args.source, |
| 'save_dir': args.save_dir, |
| 'patch_save_dir': patch_save_dir, |
| 'mask_save_dir' : mask_save_dir, |
| 'stitch_save_dir': stitch_save_dir} |
|
|
| for key, val in directories.items(): |
| print("{} : {}".format(key, val)) |
| if key not in ['source']: |
| os.makedirs(val, exist_ok=True) |
|
|
| seg_params = {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False, |
| 'keep_ids': 'none', 'exclude_ids': 'none', 'seg_downsample': args.seg_downsample} |
| |
| filter_params = {'a_t':1, 'a_h': 1, 'max_n_holes':100} |
| |
| vis_params = {'vis_level': -1, 'line_thickness': 100} |
| patch_params = {'use_padding': True, 'contour_fn': 'four_pt'} |
|
|
| if args.preset: |
| preset_df = pd.read_csv(os.path.join('presets', args.preset)) |
| for key in seg_params.keys(): |
| if key in preset_df.columns: |
| seg_params[key] = preset_df.loc[0, key] |
|
|
| for key in filter_params.keys(): |
| if key in preset_df.columns: |
| filter_params[key] = preset_df.loc[0, key] |
|
|
| for key in vis_params.keys(): |
| if key in preset_df.columns: |
| vis_params[key] = preset_df.loc[0, key] |
|
|
| for key in patch_params.keys(): |
| if key in preset_df.columns: |
| patch_params[key] = preset_df.loc[0, key] |
| |
| parameters = {'seg_params': seg_params, |
| 'filter_params': filter_params, |
| 'patch_params': patch_params, |
| 'vis_params': vis_params} |
|
|
| print(parameters) |
|
|
| rank = getattr(args, 'rank', 0) |
| world_size = getattr(args, 'world_size', 1) |
| print(f"rank: {rank}, world_size: {world_size}") |
|
|
| seg_times, patch_times = seg_and_patch(**directories, **parameters, |
| patch_size = args.patch_size, step_size=args.step_size, |
| seg = args.seg, use_default_params=False, save_mask = True, |
| stitch= args.stitch, |
| patch_level=args.patch_level, patch = args.patch, |
| process_list = process_list, auto_skip=args.auto_skip, |
| rank=rank, world_size=world_size, |
| assumed_mpp=args.assumed_mpp) |
|
|
| return seg_times, patch_times |
|
|
|
|
| if __name__ == '__main__': |
|
|
| parser = argparse.ArgumentParser('create_patches', parents=[get_args_parser()]) |
| args = parser.parse_args() |
|
|
| main(args) |
|
|