Spaces:

VLGroup
/

MicroscopyMatching

Runtime error

App Files Files Community

VisionLanguageGroup commited on Jan 18

Commit

aff3c6f

1 Parent(s): 01050f6

init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +5 -4
_utils/attn_utils.py +592 -0
_utils/attn_utils_new.py +610 -0
_utils/config.yaml +15 -0
_utils/example_config.yaml +20 -0
_utils/load_models.py +16 -0
_utils/load_track_data.py +118 -0
_utils/misc_helper.py +37 -0
_utils/seg_eval.py +61 -0
_utils/track_args.py +157 -0
app.py +1638 -0
config.py +44 -0
counting.py +340 -0
example_imgs/cnt/047cell.png +3 -0
example_imgs/cnt/62_10.png +3 -0
example_imgs/cnt/6800-17000_GTEX-XQ3S_Adipose-Subcutaneous.png +3 -0
example_imgs/seg/003_img.png +3 -0
example_imgs/seg/1-23 [Scan I08].png +3 -0
example_imgs/seg/10X_B2_Tile-15.aligned.png +3 -0
example_imgs/seg/1977_Well_F-5_Field_1.png +3 -0
example_imgs/seg/200972823[5179]_RhoGGG_YAP_TAZ [200972823 Well K6 Field #2].png +3 -0
example_imgs/seg/A172_Phase_C7_1_00d00h00m_1.png +3 -0
example_imgs/seg/JE2NileRed_oilp22_PMP_101220_011_NR.png +3 -0
example_imgs/seg/OpenTest_031.png +3 -0
example_imgs/seg/X_24.png +3 -0
example_imgs/seg/exp_A01_G002_0001.oir.png +3 -0
example_imgs/tra/tracking_test_sequence.zip +3 -0
example_imgs/tra/tracking_test_sequence2.zip +3 -0
inference_count.py +237 -0
inference_seg.py +87 -0
inference_track.py +202 -0
models/.DS_Store +0 -0
models/enc_model/__init__.py +0 -0
models/enc_model/backbone.py +64 -0
models/enc_model/loca.py +232 -0
models/enc_model/loca_args.py +44 -0
models/enc_model/mlp.py +23 -0
models/enc_model/ope.py +245 -0
models/enc_model/positional_encoding.py +30 -0
models/enc_model/regression_head.py +92 -0
models/enc_model/transformer.py +94 -0
models/enc_model/unet_parts.py +77 -0
models/model.py +653 -0
models/seg_post_model/cellpose/__init__.py +1 -0
models/seg_post_model/cellpose/__main__.py +272 -0
models/seg_post_model/cellpose/cli.py +240 -0
models/seg_post_model/cellpose/core.py +322 -0
models/seg_post_model/cellpose/denoise.py +1474 -0
models/seg_post_model/cellpose/dynamics.py +691 -0
models/seg_post_model/cellpose/export.py +405 -0

README.md CHANGED Viewed

@@ -1,11 +1,12 @@
 ---
 title: MicroscopyMatching
-emoji: 🐢
-colorFrom: green
-colorTo: pink
 sdk: gradio
-sdk_version: 6.3.0
 app_file: app.py
 pinned: false
 ---

 ---
 title: MicroscopyMatching
+emoji: 🚀
+colorFrom: gray
+colorTo: red
 sdk: gradio
+sdk_version: 5.49.1
 app_file: app.py
+python_version: 3.11
 pinned: false
 ---

_utils/attn_utils.py ADDED Viewed

	@@ -0,0 +1,592 @@

+import abc
+import cv2
+import numpy as np
+import torch
+from IPython.display import display
+from PIL import Image
+from typing import Union, Tuple, List
+from einops import rearrange, repeat
+import math
+from torch import nn, einsum
+from inspect import isfunction
+from diffusers.utils import logging
+try:
+    from diffusers.models.unet_2d_condition import UNet2DConditionOutput
+except:
+    from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput
+try:
+    from diffusers.models.cross_attention import CrossAttention
+except:
+    from diffusers.models.attention_processor import Attention as CrossAttention
+MAX_NUM_WORDS = 77
+LOW_RESOURCE = False
+class CountingCrossAttnProcessor1:
+    def __init__(self, attnstore, place_in_unet):
+        super().__init__()
+        self.attnstore = attnstore
+        self.place_in_unet = place_in_unet
+    def __call__(self, attn_layer: CrossAttention, hidden_states, encoder_hidden_states=None, attention_mask=None):
+        batch_size, sequence_length, dim = hidden_states.shape
+        h = attn_layer.heads
+        q = attn_layer.to_q(hidden_states)
+        is_cross = encoder_hidden_states is not None
+        context = encoder_hidden_states if is_cross else hidden_states
+        k = attn_layer.to_k(context)
+        v = attn_layer.to_v(context)
+        # q = attn_layer.reshape_heads_to_batch_dim(q)
+        # k = attn_layer.reshape_heads_to_batch_dim(k)
+        # v = attn_layer.reshape_heads_to_batch_dim(v)
+        # q = attn_layer.head_to_batch_dim(q)
+        # k = attn_layer.head_to_batch_dim(k)
+        # v = attn_layer.head_to_batch_dim(v)
+        q = self.head_to_batch_dim(q, h)
+        k = self.head_to_batch_dim(k, h)
+        v = self.head_to_batch_dim(v, h)
+        sim = torch.einsum("b i d, b j d -> b i j", q, k) * attn_layer.scale
+        if attention_mask is not None:
+            attention_mask = attention_mask.reshape(batch_size, -1)
+            max_neg_value = -torch.finfo(sim.dtype).max
+            attention_mask = attention_mask[:, None, :].repeat(h, 1, 1)
+            sim.masked_fill_(~attention_mask, max_neg_value)
+        # attention, what we cannot get enough of
+        attn_ = sim.softmax(dim=-1).clone()
+        # softmax = nn.Softmax(dim=-1)
+        # attn_ = softmax(sim)
+        self.attnstore(attn_, is_cross, self.place_in_unet)
+        out = torch.einsum("b i j, b j d -> b i d", attn_, v)
+        # out = attn_layer.batch_to_head_dim(out)
+        out = self.batch_to_head_dim(out, h)
+        if type(attn_layer.to_out) is torch.nn.modules.container.ModuleList:
+            to_out = attn_layer.to_out[0]
+        else:
+            to_out = attn_layer.to_out
+        out = to_out(out)
+        return out
+    def batch_to_head_dim(self, tensor, head_size):
+        # head_size = self.heads
+        batch_size, seq_len, dim = tensor.shape
+        tensor = tensor.reshape(batch_size // head_size, head_size, seq_len, dim)
+        tensor = tensor.permute(0, 2, 1, 3).reshape(batch_size // head_size, seq_len, dim * head_size)
+        return tensor
+    def head_to_batch_dim(self, tensor, head_size, out_dim=3):
+        # head_size = self.heads
+        batch_size, seq_len, dim = tensor.shape
+        tensor = tensor.reshape(batch_size, seq_len, head_size, dim // head_size)
+        tensor = tensor.permute(0, 2, 1, 3)
+        if out_dim == 3:
+            tensor = tensor.reshape(batch_size * head_size, seq_len, dim // head_size)
+        return tensor
+def register_attention_control(model, controller):
+    attn_procs = {}
+    cross_att_count = 0
+    for name in model.unet.attn_processors.keys():
+        cross_attention_dim = None if name.endswith("attn1.processor") else model.unet.config.cross_attention_dim
+        if name.startswith("mid_block"):
+            hidden_size = model.unet.config.block_out_channels[-1]
+            place_in_unet = "mid"
+        elif name.startswith("up_blocks"):
+            block_id = int(name[len("up_blocks.")])
+            hidden_size = list(reversed(model.unet.config.block_out_channels))[block_id]
+            place_in_unet = "up"
+        elif name.startswith("down_blocks"):
+            block_id = int(name[len("down_blocks.")])
+            hidden_size = model.unet.config.block_out_channels[block_id]
+            place_in_unet = "down"
+        else:
+            continue
+        cross_att_count += 1
+        # attn_procs[name] = AttendExciteCrossAttnProcessor(
+        #     attnstore=controller, place_in_unet=place_in_unet
+        # )
+        attn_procs[name] = CountingCrossAttnProcessor1(
+            attnstore=controller, place_in_unet=place_in_unet
+        )
+    model.unet.set_attn_processor(attn_procs)
+    controller.num_att_layers = cross_att_count
+def register_hier_output(model):
+    self = model.unet
+    from ldm.modules.diffusionmodules.util import checkpoint, timestep_embedding
+    logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+    def forward(sample, timestep=None, encoder_hidden_states=None, class_labels=None, timestep_cond=None,
+                attention_mask=None, cross_attention_kwargs=None, added_cond_kwargs=None, down_block_additional_residuals=None,
+                mid_block_additional_residual=None, encoder_attention_mask=None, return_dict=True):
+        out_list = []
+        default_overall_up_factor = 2**self.num_upsamplers
+        # upsample size should be forwarded when sample is not a multiple of `default_overall_up_factor`
+        forward_upsample_size = False
+        upsample_size = None
+        if any(s % default_overall_up_factor != 0 for s in sample.shape[-2:]):
+            logger.info("Forward upsample size to force interpolation output size.")
+            forward_upsample_size = True
+        if attention_mask is not None:
+            # assume that mask is expressed as:
+            #   (1 = keep,      0 = discard)
+            # convert mask into a bias that can be added to attention scores:
+            #       (keep = +0,     discard = -10000.0)
+            attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0
+            attention_mask = attention_mask.unsqueeze(1)
+        if encoder_attention_mask is not None:
+            encoder_attention_mask = (1 - encoder_attention_mask.to(sample.dtype)) * -10000.0
+            encoder_attention_mask = encoder_attention_mask.unsqueeze(1)
+        if self.config.center_input_sample:
+            sample = 2 * sample - 1.0
+        timesteps = timestep
+        if not torch.is_tensor(timesteps):
+            # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
+            # This would be a good case for the `match` statement (Python 3.10+)
+            is_mps = sample.device.type == "mps"
+            if isinstance(timestep, float):
+                dtype = torch.float32 if is_mps else torch.float64
+            else:
+                dtype = torch.int32 if is_mps else torch.int64
+            timesteps = torch.tensor([timesteps], dtype=dtype, device=sample.device)
+        elif len(timesteps.shape) == 0:
+            timesteps = timesteps[None].to(sample.device)
+        # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+        timesteps = timesteps.expand(sample.shape[0])
+        t_emb = self.time_proj(timesteps)
+        t_emb = t_emb.to(dtype=sample.dtype)
+        emb = self.time_embedding(t_emb, timestep_cond)
+        aug_emb = None
+        if self.class_embedding is not None:
+            if class_labels is None:
+                raise ValueError("class_labels should be provided when num_class_embeds > 0")
+            if self.config.class_embed_type == "timestep":
+                class_labels = self.time_proj(class_labels)
+                # `Timesteps` does not contain any weights and will always return f32 tensors
+                # there might be better ways to encapsulate this.
+                class_labels = class_labels.to(dtype=sample.dtype)
+            class_emb = self.class_embedding(class_labels).to(dtype=sample.dtype)
+            if self.config.class_embeddings_concat:
+                emb = torch.cat([emb, class_emb], dim=-1)
+            else:
+                emb = emb + class_emb
+        if self.config.addition_embed_type == "text":
+            aug_emb = self.add_embedding(encoder_hidden_states)
+        elif self.config.addition_embed_type == "text_image":
+            # Kandinsky 2.1 - style
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_image' which requires the keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
+                )
+            image_embs = added_cond_kwargs.get("image_embeds")
+            text_embs = added_cond_kwargs.get("text_embeds", encoder_hidden_states)
+            aug_emb = self.add_embedding(text_embs, image_embs)
+        elif self.config.addition_embed_type == "text_time":
+            # SDXL - style
+            if "text_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`"
+                )
+            text_embeds = added_cond_kwargs.get("text_embeds")
+            if "time_ids" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`"
+                )
+            time_ids = added_cond_kwargs.get("time_ids")
+            time_embeds = self.add_time_proj(time_ids.flatten())
+            time_embeds = time_embeds.reshape((text_embeds.shape[0], -1))
+            add_embeds = torch.concat([text_embeds, time_embeds], dim=-1)
+            add_embeds = add_embeds.to(emb.dtype)
+            aug_emb = self.add_embedding(add_embeds)
+        elif self.config.addition_embed_type == "image":
+            # Kandinsky 2.2 - style
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'image' which requires the keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
+                )
+            image_embs = added_cond_kwargs.get("image_embeds")
+            aug_emb = self.add_embedding(image_embs)
+        elif self.config.addition_embed_type == "image_hint":
+            # Kandinsky 2.2 - style
+            if "image_embeds" not in added_cond_kwargs or "hint" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'image_hint' which requires the keyword arguments `image_embeds` and `hint` to be passed in `added_cond_kwargs`"
+                )
+            image_embs = added_cond_kwargs.get("image_embeds")
+            hint = added_cond_kwargs.get("hint")
+            aug_emb, hint = self.add_embedding(image_embs, hint)
+            sample = torch.cat([sample, hint], dim=1)
+        emb = emb + aug_emb if aug_emb is not None else emb
+        if self.time_embed_act is not None:
+            emb = self.time_embed_act(emb)
+        if self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "text_proj":
+            encoder_hidden_states = self.encoder_hid_proj(encoder_hidden_states)
+        elif self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "text_image_proj":
+            # Kadinsky 2.1 - style
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'text_image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
+                )
+            image_embeds = added_cond_kwargs.get("image_embeds")
+            encoder_hidden_states = self.encoder_hid_proj(encoder_hidden_states, image_embeds)
+        elif self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "image_proj":
+            # Kandinsky 2.2 - style
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
+                )
+            image_embeds = added_cond_kwargs.get("image_embeds")
+            encoder_hidden_states = self.encoder_hid_proj(image_embeds)
+        # 2. pre-process
+        sample = self.conv_in(sample) # 1, 320, 64, 64
+        # 2.5 GLIGEN position net
+        if cross_attention_kwargs is not None and cross_attention_kwargs.get("gligen", None) is not None:
+            cross_attention_kwargs = cross_attention_kwargs.copy()
+            gligen_args = cross_attention_kwargs.pop("gligen")
+            cross_attention_kwargs["gligen"] = {"objs": self.position_net(**gligen_args)}
+        # 3. down
+        lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+        is_controlnet = mid_block_additional_residual is not None and down_block_additional_residuals is not None
+        is_adapter = mid_block_additional_residual is None and down_block_additional_residuals is not None
+        down_block_res_samples = (sample,)
+        for downsample_block in self.down_blocks:
+            if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention:
+                # For t2i-adapter CrossAttnDownBlock2D
+                additional_residuals = {}
+                if is_adapter and len(down_block_additional_residuals) > 0:
+                    additional_residuals["additional_residuals"] = down_block_additional_residuals.pop(0)
+                sample, res_samples = downsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    encoder_hidden_states=encoder_hidden_states,
+                    attention_mask=attention_mask,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    encoder_attention_mask=encoder_attention_mask,
+                    **additional_residuals,
+                )
+            else:
+                sample, res_samples = downsample_block(hidden_states=sample, temb=emb, scale=lora_scale)
+                if is_adapter and len(down_block_additional_residuals) > 0:
+                    sample += down_block_additional_residuals.pop(0)
+            down_block_res_samples += res_samples
+        if is_controlnet:
+            new_down_block_res_samples = ()
+            for down_block_res_sample, down_block_additional_residual in zip(
+                down_block_res_samples, down_block_additional_residuals
+            ):
+                down_block_res_sample = down_block_res_sample + down_block_additional_residual
+                new_down_block_res_samples = new_down_block_res_samples + (down_block_res_sample,)
+            down_block_res_samples = new_down_block_res_samples
+        # 4. mid
+        if self.mid_block is not None:
+            sample = self.mid_block(
+                sample,
+                emb,
+                encoder_hidden_states=encoder_hidden_states,
+                attention_mask=attention_mask,
+                cross_attention_kwargs=cross_attention_kwargs,
+                encoder_attention_mask=encoder_attention_mask,
+            )
+            # To support T2I-Adapter-XL
+            if (
+                is_adapter
+                and len(down_block_additional_residuals) > 0
+                and sample.shape == down_block_additional_residuals[0].shape
+            ):
+                sample += down_block_additional_residuals.pop(0)
+        if is_controlnet:
+            sample = sample + mid_block_additional_residual
+        # 5. up
+        for i, upsample_block in enumerate(self.up_blocks):
+            is_final_block = i == len(self.up_blocks) - 1
+            res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
+            down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
+            # if we have not reached the final block and need to forward the
+            # upsample size, we do it here
+            if not is_final_block and forward_upsample_size:
+                upsample_size = down_block_res_samples[-1].shape[2:]
+            if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_tuple=res_samples,
+                    encoder_hidden_states=encoder_hidden_states,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    upsample_size=upsample_size,
+                    attention_mask=attention_mask,
+                    encoder_attention_mask=encoder_attention_mask,
+                )
+            else:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_tuple=res_samples,
+                    upsample_size=upsample_size,
+                    scale=lora_scale,
+                )
+            # if i in [1, 4, 7]:
+            out_list.append(sample)
+        # 6. post-process
+        if self.conv_norm_out:
+            sample = self.conv_norm_out(sample)
+            sample = self.conv_act(sample)
+        sample = self.conv_out(sample)
+        if not return_dict:
+            return (sample,)
+        return UNet2DConditionOutput(sample=sample), out_list
+    self.forward = forward
+class AttentionControl(abc.ABC):
+    def step_callback(self, x_t):
+        return x_t
+    def between_steps(self):
+        return
+    @property
+    def num_uncond_att_layers(self):
+        return 0
+    @abc.abstractmethod
+    def forward(self, attn, is_cross: bool, place_in_unet: str):
+        raise NotImplementedError
+    def __call__(self, attn, is_cross: bool, place_in_unet: str):
+        if self.cur_att_layer >= self.num_uncond_att_layers:
+            # self.forward(attn, is_cross, place_in_unet)
+            if LOW_RESOURCE:
+                attn = self.forward(attn, is_cross, place_in_unet)
+            else:
+                h = attn.shape[0]
+                attn[h // 2:] = self.forward(attn[h // 2:], is_cross, place_in_unet)
+        self.cur_att_layer += 1
+        if self.cur_att_layer == self.num_att_layers + self.num_uncond_att_layers:
+            self.cur_att_layer = 0
+            self.cur_step += 1
+            self.between_steps()
+        return attn
+    def reset(self):
+        self.cur_step = 0
+        self.cur_att_layer = 0
+    def __init__(self):
+        self.cur_step = 0
+        self.num_att_layers = -1
+        self.cur_att_layer = 0
+class EmptyControl(AttentionControl):
+    def forward(self, attn, is_cross: bool, place_in_unet: str):
+        return attn
+class AttentionStore(AttentionControl):
+    @staticmethod
+    def get_empty_store():
+        return {"down_cross": [], "mid_cross": [], "up_cross": [],
+                "down_self": [], "mid_self": [], "up_self": []}
+    def forward(self, attn, is_cross: bool, place_in_unet: str):
+        key = f"{place_in_unet}_{'cross' if is_cross else 'self'}"
+        if attn.shape[1] <= self.max_size ** 2:  # avoid memory overhead
+            self.step_store[key].append(attn)
+        return attn
+    def between_steps(self):
+        self.attention_store = self.step_store
+        if self.save_global_store:
+            with torch.no_grad():
+                if len(self.global_store) == 0:
+                    self.global_store = self.step_store
+                else:
+                    for key in self.global_store:
+                        for i in range(len(self.global_store[key])):
+                            self.global_store[key][i] += self.step_store[key][i].detach()
+        self.step_store = self.get_empty_store()
+        self.step_store = self.get_empty_store()
+    def get_average_attention(self):
+        average_attention = self.attention_store
+        return average_attention
+    def get_average_global_attention(self):
+        average_attention = {key: [item / self.cur_step for item in self.global_store[key]] for key in
+                             self.attention_store}
+        return average_attention
+    def reset(self):
+        super(AttentionStore, self).reset()
+        self.step_store = self.get_empty_store()
+        self.attention_store = {}
+        self.global_store = {}
+    def __init__(self, max_size=32, save_global_store=False):
+        '''
+        Initialize an empty AttentionStore
+        :param step_index: used to visualize only a specific step in the diffusion process
+        '''
+        super(AttentionStore, self).__init__()
+        self.save_global_store = save_global_store
+        self.max_size = max_size
+        self.step_store = self.get_empty_store()
+        self.attention_store = {}
+        self.global_store = {}
+        self.curr_step_index = 0
+def aggregate_attention(prompts, attention_store: AttentionStore, res: int, from_where: List[str], is_cross: bool, select: int):
+    out = []
+    attention_maps = attention_store.get_average_attention()
+    num_pixels = res ** 2
+    for location in from_where:
+        for item in attention_maps[f"{location}_{'cross' if is_cross else 'self'}"]:
+            if item.shape[1] == num_pixels:
+                cross_maps = item.reshape(len(prompts), -1, res, res, item.shape[-1])[select]
+                out.append(cross_maps)
+    out = torch.cat(out, dim=0)
+    out = out.sum(0) / out.shape[0]
+    return out
+def show_cross_attention(tokenizer, prompts, attention_store: AttentionStore, res: int, from_where: List[str], select: int = 0):
+    tokens = tokenizer.encode(prompts[select])
+    decoder = tokenizer.decode
+    attention_maps = aggregate_attention(attention_store, res, from_where, True, select)
+    images = []
+    for i in range(len(tokens)):
+        image = attention_maps[:, :, i]
+        image = 255 * image / image.max()
+        image = image.unsqueeze(-1).expand(*image.shape, 3)
+        image = image.numpy().astype(np.uint8)
+        image = np.array(Image.fromarray(image).resize((256, 256)))
+        image = text_under_image(image, decoder(int(tokens[i])))
+        images.append(image)
+    view_images(np.stack(images, axis=0))
+def show_self_attention_comp(attention_store: AttentionStore, res: int, from_where: List[str],
+                        max_com=10, select: int = 0):
+    attention_maps = aggregate_attention(attention_store, res, from_where, False, select).numpy().reshape((res ** 2, res ** 2))
+    u, s, vh = np.linalg.svd(attention_maps - np.mean(attention_maps, axis=1, keepdims=True))
+    images = []
+    for i in range(max_com):
+        image = vh[i].reshape(res, res)
+        image = image - image.min()
+        image = 255 * image / image.max()
+        image = np.repeat(np.expand_dims(image, axis=2), 3, axis=2).astype(np.uint8)
+        image = Image.fromarray(image).resize((256, 256))
+        image = np.array(image)
+        images.append(image)
+    view_images(np.concatenate(images, axis=1))
+def text_under_image(image: np.ndarray, text: str, text_color: Tuple[int, int, int] = (0, 0, 0)):
+    h, w, c = image.shape
+    offset = int(h * .2)
+    img = np.ones((h + offset, w, c), dtype=np.uint8) * 255
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    # font = ImageFont.truetype("/usr/share/fonts/truetype/noto/NotoMono-Regular.ttf", font_size)
+    img[:h] = image
+    textsize = cv2.getTextSize(text, font, 1, 2)[0]
+    text_x, text_y = (w - textsize[0]) // 2, h + offset - textsize[1] // 2
+    cv2.putText(img, text, (text_x, text_y ), font, 1, text_color, 2)
+    return img
+def view_images(images, num_rows=1, offset_ratio=0.02):
+    if type(images) is list:
+        num_empty = len(images) % num_rows
+    elif images.ndim == 4:
+        num_empty = images.shape[0] % num_rows
+    else:
+        images = [images]
+        num_empty = 0
+    empty_images = np.ones(images[0].shape, dtype=np.uint8) * 255
+    images = [image.astype(np.uint8) for image in images] + [empty_images] * num_empty
+    num_items = len(images)
+    h, w, c = images[0].shape
+    offset = int(h * offset_ratio)
+    num_cols = num_items // num_rows
+    image_ = np.ones((h * num_rows + offset * (num_rows - 1),
+                      w * num_cols + offset * (num_cols - 1), 3), dtype=np.uint8) * 255
+    for i in range(num_rows):
+        for j in range(num_cols):
+            image_[i * (h + offset): i * (h + offset) + h:, j * (w + offset): j * (w + offset) + w] = images[
+                i * num_cols + j]
+    pil_img = Image.fromarray(image_)
+    display(pil_img)
+def self_cross_attn(self_attn, cross_attn):
+    res = self_attn.shape[0]
+    assert res == cross_attn.shape[0]
+    # cross attn [res, res] -> [res*res]
+    cross_attn_ = cross_attn.reshape([res*res])
+    # self_attn [res, res, res*res]
+    self_cross_attn = cross_attn_ * self_attn
+    self_cross_attn = self_cross_attn.mean(-1).unsqueeze(0).unsqueeze(0)
+    return self_cross_attn

_utils/attn_utils_new.py ADDED Viewed

	@@ -0,0 +1,610 @@

+import abc
+import cv2
+import numpy as np
+import torch
+from IPython.display import display
+from PIL import Image
+from typing import Union, Tuple, List
+from einops import rearrange, repeat
+import math
+from torch import nn, einsum
+from inspect import isfunction
+from diffusers.utils import logging
+try:
+    from diffusers.models.unet_2d_condition import UNet2DConditionOutput
+except:
+    from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput
+try:
+    from diffusers.models.cross_attention import CrossAttention
+except:
+    from diffusers.models.attention_processor import Attention as CrossAttention
+from typing import Any, Dict, List, Optional, Tuple, Union
+MAX_NUM_WORDS = 77
+LOW_RESOURCE = False
+class CountingCrossAttnProcessor1:
+    def __init__(self, attnstore, place_in_unet):
+        super().__init__()
+        self.attnstore = attnstore
+        self.place_in_unet = place_in_unet
+    def __call__(self, attn_layer: CrossAttention, hidden_states, encoder_hidden_states=None, attention_mask=None):
+        batch_size, sequence_length, dim = hidden_states.shape
+        h = attn_layer.heads
+        q = attn_layer.to_q(hidden_states)
+        is_cross = encoder_hidden_states is not None
+        context = encoder_hidden_states if is_cross else hidden_states
+        k = attn_layer.to_k(context)
+        v = attn_layer.to_v(context)
+        # q = attn_layer.reshape_heads_to_batch_dim(q)
+        # k = attn_layer.reshape_heads_to_batch_dim(k)
+        # v = attn_layer.reshape_heads_to_batch_dim(v)
+        # q = attn_layer.head_to_batch_dim(q)
+        # k = attn_layer.head_to_batch_dim(k)
+        # v = attn_layer.head_to_batch_dim(v)
+        q = self.head_to_batch_dim(q, h)
+        k = self.head_to_batch_dim(k, h)
+        v = self.head_to_batch_dim(v, h)
+        sim = torch.einsum("b i d, b j d -> b i j", q, k) * attn_layer.scale
+        if attention_mask is not None:
+            attention_mask = attention_mask.reshape(batch_size, -1)
+            max_neg_value = -torch.finfo(sim.dtype).max
+            attention_mask = attention_mask[:, None, :].repeat(h, 1, 1)
+            sim.masked_fill_(~attention_mask, max_neg_value)
+        # attention, what we cannot get enough of
+        attn_ = sim.softmax(dim=-1).clone()
+        # softmax = nn.Softmax(dim=-1)
+        # attn_ = softmax(sim)
+        self.attnstore(attn_, is_cross, self.place_in_unet)
+        out = torch.einsum("b i j, b j d -> b i d", attn_, v)
+        # out = attn_layer.batch_to_head_dim(out)
+        out = self.batch_to_head_dim(out, h)
+        if type(attn_layer.to_out) is torch.nn.modules.container.ModuleList:
+            to_out = attn_layer.to_out[0]
+        else:
+            to_out = attn_layer.to_out
+        out = to_out(out)
+        return out
+    def batch_to_head_dim(self, tensor, head_size):
+        # head_size = self.heads
+        batch_size, seq_len, dim = tensor.shape
+        tensor = tensor.reshape(batch_size // head_size, head_size, seq_len, dim)
+        tensor = tensor.permute(0, 2, 1, 3).reshape(batch_size // head_size, seq_len, dim * head_size)
+        return tensor
+    def head_to_batch_dim(self, tensor, head_size, out_dim=3):
+        # head_size = self.heads
+        batch_size, seq_len, dim = tensor.shape
+        tensor = tensor.reshape(batch_size, seq_len, head_size, dim // head_size)
+        tensor = tensor.permute(0, 2, 1, 3)
+        if out_dim == 3:
+            tensor = tensor.reshape(batch_size * head_size, seq_len, dim // head_size)
+        return tensor
+def register_attention_control(model, controller):
+    attn_procs = {}
+    cross_att_count = 0
+    for name in model.unet.attn_processors.keys():
+        cross_attention_dim = None if name.endswith("attn1.processor") else model.unet.config.cross_attention_dim
+        if name.startswith("mid_block"):
+            hidden_size = model.unet.config.block_out_channels[-1]
+            place_in_unet = "mid"
+        elif name.startswith("up_blocks"):
+            block_id = int(name[len("up_blocks.")])
+            hidden_size = list(reversed(model.unet.config.block_out_channels))[block_id]
+            place_in_unet = "up"
+        elif name.startswith("down_blocks"):
+            block_id = int(name[len("down_blocks.")])
+            hidden_size = model.unet.config.block_out_channels[block_id]
+            place_in_unet = "down"
+        else:
+            continue
+        cross_att_count += 1
+        # attn_procs[name] = AttendExciteCrossAttnProcessor(
+        #     attnstore=controller, place_in_unet=place_in_unet
+        # )
+        attn_procs[name] = CountingCrossAttnProcessor1(
+            attnstore=controller, place_in_unet=place_in_unet
+        )
+    model.unet.set_attn_processor(attn_procs)
+    controller.num_att_layers = cross_att_count
+def register_hier_output(model):
+    self = model.unet
+    logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+    def forward(sample, timestep=None, encoder_hidden_states=None, class_labels=None, timestep_cond=None,
+                attention_mask=None, cross_attention_kwargs=None, added_cond_kwargs=None, down_block_additional_residuals=None,
+                mid_block_additional_residual=None, encoder_attention_mask=None, return_dict=True):
+        out_list = []
+        default_overall_up_factor = 2**self.num_upsamplers
+        # upsample size should be forwarded when sample is not a multiple of `default_overall_up_factor`
+        forward_upsample_size = False
+        upsample_size = None
+        if any(s % default_overall_up_factor != 0 for s in sample.shape[-2:]):
+            logger.info("Forward upsample size to force interpolation output size.")
+            forward_upsample_size = True
+        if attention_mask is not None:
+            # assume that mask is expressed as:
+            #   (1 = keep,      0 = discard)
+            # convert mask into a bias that can be added to attention scores:
+            #       (keep = +0,     discard = -10000.0)
+            attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0
+            attention_mask = attention_mask.unsqueeze(1)
+        if encoder_attention_mask is not None:
+            encoder_attention_mask = (1 - encoder_attention_mask.to(sample.dtype)) * -10000.0
+            encoder_attention_mask = encoder_attention_mask.unsqueeze(1)
+        if self.config.center_input_sample:
+            sample = 2 * sample - 1.0
+        timesteps = timestep
+        if not torch.is_tensor(timesteps):
+            # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
+            # This would be a good case for the `match` statement (Python 3.10+)
+            is_mps = sample.device.type == "mps"
+            if isinstance(timestep, float):
+                dtype = torch.float32 if is_mps else torch.float64
+            else:
+                dtype = torch.int32 if is_mps else torch.int64
+            timesteps = torch.tensor([timesteps], dtype=dtype, device=sample.device)
+        elif len(timesteps.shape) == 0:
+            timesteps = timesteps[None].to(sample.device)
+        # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+        timesteps = timesteps.expand(sample.shape[0])
+        t_emb = self.time_proj(timesteps)
+        t_emb = t_emb.to(dtype=sample.dtype)
+        emb = self.time_embedding(t_emb, timestep_cond)
+        aug_emb = None
+        if self.class_embedding is not None:
+            if class_labels is None:
+                raise ValueError("class_labels should be provided when num_class_embeds > 0")
+            if self.config.class_embed_type == "timestep":
+                class_labels = self.time_proj(class_labels)
+                # `Timesteps` does not contain any weights and will always return f32 tensors
+                # there might be better ways to encapsulate this.
+                class_labels = class_labels.to(dtype=sample.dtype)
+            class_emb = self.class_embedding(class_labels).to(dtype=sample.dtype)
+            if self.config.class_embeddings_concat:
+                emb = torch.cat([emb, class_emb], dim=-1)
+            else:
+                emb = emb + class_emb
+        if self.config.addition_embed_type == "text":
+            aug_emb = self.add_embedding(encoder_hidden_states)
+        elif self.config.addition_embed_type == "text_image":
+            # Kandinsky 2.1 - style
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_image' which requires the keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
+                )
+            image_embs = added_cond_kwargs.get("image_embeds")
+            text_embs = added_cond_kwargs.get("text_embeds", encoder_hidden_states)
+            aug_emb = self.add_embedding(text_embs, image_embs)
+        elif self.config.addition_embed_type == "text_time":
+            # SDXL - style
+            if "text_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`"
+                )
+            text_embeds = added_cond_kwargs.get("text_embeds")
+            if "time_ids" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`"
+                )
+            time_ids = added_cond_kwargs.get("time_ids")
+            time_embeds = self.add_time_proj(time_ids.flatten())
+            time_embeds = time_embeds.reshape((text_embeds.shape[0], -1))
+            add_embeds = torch.concat([text_embeds, time_embeds], dim=-1)
+            add_embeds = add_embeds.to(emb.dtype)
+            aug_emb = self.add_embedding(add_embeds)
+        elif self.config.addition_embed_type == "image":
+            # Kandinsky 2.2 - style
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'image' which requires the keyword argument `image_embeds` to be passed in `added_cond_kwargs`"
+                )
+            image_embs = added_cond_kwargs.get("image_embeds")
+            aug_emb = self.add_embedding(image_embs)
+        elif self.config.addition_embed_type == "image_hint":
+            # Kandinsky 2.2 - style
+            if "image_embeds" not in added_cond_kwargs or "hint" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `addition_embed_type` set to 'image_hint' which requires the keyword arguments `image_embeds` and `hint` to be passed in `added_cond_kwargs`"
+                )
+            image_embs = added_cond_kwargs.get("image_embeds")
+            hint = added_cond_kwargs.get("hint")
+            aug_emb, hint = self.add_embedding(image_embs, hint)
+            sample = torch.cat([sample, hint], dim=1)
+        emb = emb + aug_emb if aug_emb is not None else emb
+        if self.time_embed_act is not None:
+            emb = self.time_embed_act(emb)
+        if self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "text_proj":
+            encoder_hidden_states = self.encoder_hid_proj(encoder_hidden_states)
+        elif self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "text_image_proj":
+            # Kadinsky 2.1 - style
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'text_image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
+                )
+            image_embeds = added_cond_kwargs.get("image_embeds")
+            encoder_hidden_states = self.encoder_hid_proj(encoder_hidden_states, image_embeds)
+        elif self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "image_proj":
+            # Kandinsky 2.2 - style
+            if "image_embeds" not in added_cond_kwargs:
+                raise ValueError(
+                    f"{self.__class__} has the config param `encoder_hid_dim_type` set to 'image_proj' which requires the keyword argument `image_embeds` to be passed in  `added_conditions`"
+                )
+            image_embeds = added_cond_kwargs.get("image_embeds")
+            encoder_hidden_states = self.encoder_hid_proj(image_embeds)
+        # 2. pre-process
+        sample = self.conv_in(sample) # 1, 320, 64, 64
+        # 2.5 GLIGEN position net
+        if cross_attention_kwargs is not None and cross_attention_kwargs.get("gligen", None) is not None:
+            cross_attention_kwargs = cross_attention_kwargs.copy()
+            gligen_args = cross_attention_kwargs.pop("gligen")
+            cross_attention_kwargs["gligen"] = {"objs": self.position_net(**gligen_args)}
+        # 3. down
+        lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+        is_controlnet = mid_block_additional_residual is not None and down_block_additional_residuals is not None
+        is_adapter = mid_block_additional_residual is None and down_block_additional_residuals is not None
+        down_block_res_samples = (sample,)
+        for downsample_block in self.down_blocks:
+            if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention:
+                # For t2i-adapter CrossAttnDownBlock2D
+                additional_residuals = {}
+                if is_adapter and len(down_block_additional_residuals) > 0:
+                    additional_residuals["additional_residuals"] = down_block_additional_residuals.pop(0)
+                sample, res_samples = downsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    encoder_hidden_states=encoder_hidden_states,
+                    attention_mask=attention_mask,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    encoder_attention_mask=encoder_attention_mask,
+                    **additional_residuals,
+                )
+            else:
+                sample, res_samples = downsample_block(hidden_states=sample, temb=emb, scale=lora_scale)
+                if is_adapter and len(down_block_additional_residuals) > 0:
+                    sample += down_block_additional_residuals.pop(0)
+            down_block_res_samples += res_samples
+        if is_controlnet:
+            new_down_block_res_samples = ()
+            for down_block_res_sample, down_block_additional_residual in zip(
+                down_block_res_samples, down_block_additional_residuals
+            ):
+                down_block_res_sample = down_block_res_sample + down_block_additional_residual
+                new_down_block_res_samples = new_down_block_res_samples + (down_block_res_sample,)
+            down_block_res_samples = new_down_block_res_samples
+        # 4. mid
+        if self.mid_block is not None:
+            sample = self.mid_block(
+                sample,
+                emb,
+                encoder_hidden_states=encoder_hidden_states,
+                attention_mask=attention_mask,
+                cross_attention_kwargs=cross_attention_kwargs,
+                encoder_attention_mask=encoder_attention_mask,
+            )
+            # To support T2I-Adapter-XL
+            if (
+                is_adapter
+                and len(down_block_additional_residuals) > 0
+                and sample.shape == down_block_additional_residuals[0].shape
+            ):
+                sample += down_block_additional_residuals.pop(0)
+        if is_controlnet:
+            sample = sample + mid_block_additional_residual
+        # 5. up
+        for i, upsample_block in enumerate(self.up_blocks):
+            is_final_block = i == len(self.up_blocks) - 1
+            res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
+            down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
+            # if we have not reached the final block and need to forward the
+            # upsample size, we do it here
+            if not is_final_block and forward_upsample_size:
+                upsample_size = down_block_res_samples[-1].shape[2:]
+            if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_tuple=res_samples,
+                    encoder_hidden_states=encoder_hidden_states,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    upsample_size=upsample_size,
+                    attention_mask=attention_mask,
+                    encoder_attention_mask=encoder_attention_mask,
+                )
+            else:
+                sample = upsample_block(
+                    hidden_states=sample,
+                    temb=emb,
+                    res_hidden_states_tuple=res_samples,
+                    upsample_size=upsample_size,
+                    scale=lora_scale,
+                )
+            out_list.append(sample)
+        # 6. post-process
+        if self.conv_norm_out:
+            sample = self.conv_norm_out(sample)
+            sample = self.conv_act(sample)
+        sample = self.conv_out(sample)
+        if not return_dict:
+            return (sample,)
+        return UNet2DConditionOutput(sample=sample), out_list
+    self.forward = forward
+class AttentionControl(abc.ABC):
+    def step_callback(self, x_t):
+        return x_t
+    def between_steps(self):
+        return
+    @property
+    def num_uncond_att_layers(self):
+        return 0
+    @abc.abstractmethod
+    def forward(self, attn, is_cross: bool, place_in_unet: str):
+        raise NotImplementedError
+    def __call__(self, attn, is_cross: bool, place_in_unet: str):
+        if self.cur_att_layer >= self.num_uncond_att_layers:
+            # self.forward(attn, is_cross, place_in_unet)
+            if LOW_RESOURCE:
+                attn = self.forward(attn, is_cross, place_in_unet)
+            else:
+                h = attn.shape[0]
+                attn[h // 2:] = self.forward(attn[h // 2:], is_cross, place_in_unet)
+        self.cur_att_layer += 1
+        if self.cur_att_layer == self.num_att_layers + self.num_uncond_att_layers:
+            self.cur_att_layer = 0
+            self.cur_step += 1
+            self.between_steps()
+        return attn
+    def reset(self):
+        self.cur_step = 0
+        self.cur_att_layer = 0
+    def __init__(self):
+        self.cur_step = 0
+        self.num_att_layers = -1
+        self.cur_att_layer = 0
+class EmptyControl(AttentionControl):
+    def forward(self, attn, is_cross: bool, place_in_unet: str):
+        return attn
+class AttentionStore(AttentionControl):
+    @staticmethod
+    def get_empty_store():
+        return {"down_cross": [], "mid_cross": [], "up_cross": [],
+                "down_self": [], "mid_self": [], "up_self": []}
+    def forward(self, attn, is_cross: bool, place_in_unet: str):
+        key = f"{place_in_unet}_{'cross' if is_cross else 'self'}"
+        if attn.shape[1] <= self.max_size ** 2:  # avoid memory overhead
+            self.step_store[key].append(attn)
+        return attn
+    def between_steps(self):
+        self.attention_store = self.step_store
+        if self.save_global_store:
+            with torch.no_grad():
+                if len(self.global_store) == 0:
+                    self.global_store = self.step_store
+                else:
+                    for key in self.global_store:
+                        for i in range(len(self.global_store[key])):
+                            self.global_store[key][i] += self.step_store[key][i].detach()
+        self.step_store = self.get_empty_store()
+        self.step_store = self.get_empty_store()
+    def get_average_attention(self):
+        average_attention = self.attention_store
+        return average_attention
+    def get_average_global_attention(self):
+        average_attention = {key: [item / self.cur_step for item in self.global_store[key]] for key in
+                             self.attention_store}
+        return average_attention
+    def reset(self):
+        super(AttentionStore, self).reset()
+        self.step_store = self.get_empty_store()
+        self.attention_store = {}
+        self.global_store = {}
+    def __init__(self, max_size=32, save_global_store=False):
+        '''
+        Initialize an empty AttentionStore
+        :param step_index: used to visualize only a specific step in the diffusion process
+        '''
+        super(AttentionStore, self).__init__()
+        self.save_global_store = save_global_store
+        self.max_size = max_size
+        self.step_store = self.get_empty_store()
+        self.attention_store = {}
+        self.global_store = {}
+        self.curr_step_index = 0
+def aggregate_attention(prompts, attention_store: AttentionStore, res: int, from_where: List[str], is_cross: bool, select: int):
+    out = []
+    attention_maps = attention_store.get_average_attention()
+    num_pixels = res ** 2
+    for location in from_where:
+        for item in attention_maps[f"{location}_{'cross' if is_cross else 'self'}"]:
+            if item.shape[1] == num_pixels:
+                cross_maps = item.reshape(len(prompts), -1, res, res, item.shape[-1])[select]
+                out.append(cross_maps)
+    out = torch.cat(out, dim=0)
+    out = out.sum(0) / out.shape[0]
+    return out
+def aggregate_attention1(prompts, attention_store: AttentionStore, res: int, from_where: List[str], is_cross: bool, select: int):
+    out = []
+    attention_maps = attention_store.get_average_attention()
+    num_pixels = res ** 2
+    for location in from_where:
+        for item in attention_maps[f"{location}_{'cross' if is_cross else 'self'}"]:
+            if item.shape[1] == num_pixels:
+                cross_maps = item.reshape(len(prompts), -1, res, res, item.shape[-1])[select]
+                out.append(cross_maps)
+    # out = torch.cat(out, dim=0)
+    # out = out.sum(0) / out.shape[0]
+    out = out[1]
+    out = out.sum(0) / out.shape[0]
+    return out
+def show_cross_attention(tokenizer, prompts, attention_store: AttentionStore, res: int, from_where: List[str], select: int = 0):
+    tokens = tokenizer.encode(prompts[select])
+    decoder = tokenizer.decode
+    attention_maps = aggregate_attention(attention_store, res, from_where, True, select)
+    images = []
+    for i in range(len(tokens)):
+        image = attention_maps[:, :, i]
+        image = 255 * image / image.max()
+        image = image.unsqueeze(-1).expand(*image.shape, 3)
+        image = image.numpy().astype(np.uint8)
+        image = np.array(Image.fromarray(image).resize((256, 256)))
+        image = text_under_image(image, decoder(int(tokens[i])))
+        images.append(image)
+    view_images(np.stack(images, axis=0))
+def show_self_attention_comp(attention_store: AttentionStore, res: int, from_where: List[str],
+                        max_com=10, select: int = 0):
+    attention_maps = aggregate_attention(attention_store, res, from_where, False, select).numpy().reshape((res ** 2, res ** 2))
+    u, s, vh = np.linalg.svd(attention_maps - np.mean(attention_maps, axis=1, keepdims=True))
+    images = []
+    for i in range(max_com):
+        image = vh[i].reshape(res, res)
+        image = image - image.min()
+        image = 255 * image / image.max()
+        image = np.repeat(np.expand_dims(image, axis=2), 3, axis=2).astype(np.uint8)
+        image = Image.fromarray(image).resize((256, 256))
+        image = np.array(image)
+        images.append(image)
+    view_images(np.concatenate(images, axis=1))
+def text_under_image(image: np.ndarray, text: str, text_color: Tuple[int, int, int] = (0, 0, 0)):
+    h, w, c = image.shape
+    offset = int(h * .2)
+    img = np.ones((h + offset, w, c), dtype=np.uint8) * 255
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    # font = ImageFont.truetype("/usr/share/fonts/truetype/noto/NotoMono-Regular.ttf", font_size)
+    img[:h] = image
+    textsize = cv2.getTextSize(text, font, 1, 2)[0]
+    text_x, text_y = (w - textsize[0]) // 2, h + offset - textsize[1] // 2
+    cv2.putText(img, text, (text_x, text_y ), font, 1, text_color, 2)
+    return img
+def view_images(images, num_rows=1, offset_ratio=0.02):
+    if type(images) is list:
+        num_empty = len(images) % num_rows
+    elif images.ndim == 4:
+        num_empty = images.shape[0] % num_rows
+    else:
+        images = [images]
+        num_empty = 0
+    empty_images = np.ones(images[0].shape, dtype=np.uint8) * 255
+    images = [image.astype(np.uint8) for image in images] + [empty_images] * num_empty
+    num_items = len(images)
+    h, w, c = images[0].shape
+    offset = int(h * offset_ratio)
+    num_cols = num_items // num_rows
+    image_ = np.ones((h * num_rows + offset * (num_rows - 1),
+                      w * num_cols + offset * (num_cols - 1), 3), dtype=np.uint8) * 255
+    for i in range(num_rows):
+        for j in range(num_cols):
+            image_[i * (h + offset): i * (h + offset) + h:, j * (w + offset): j * (w + offset) + w] = images[
+                i * num_cols + j]
+    pil_img = Image.fromarray(image_)
+    display(pil_img)
+def self_cross_attn(self_attn, cross_attn):
+    cross_attn = cross_attn.squeeze()
+    res = self_attn.shape[0]
+    assert res == cross_attn.shape[-1]
+    # cross attn [res, res] -> [res*res]
+    cross_attn_ = cross_attn.reshape([res*res])
+    # self_attn [res, res, res*res]
+    self_cross_attn = cross_attn_ * self_attn
+    self_cross_attn = self_cross_attn.mean(-1).unsqueeze(0).unsqueeze(0)
+    return self_cross_attn

_utils/config.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+attn_dist_mode: v0
+attn_positional_bias: rope
+attn_positional_bias_n_spatial: 16
+causal_norm: quiet_softmax
+coord_dim: 2
+d_model: 320
+dropout: 0.0
+feat_dim: 7
+feat_embed_per_dim: 8
+nhead: 4
+num_decoder_layers: 6
+num_encoder_layers: 6
+pos_embed_per_dim: 32
+spatial_pos_cutoff: 256
+window: 4

_utils/example_config.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+batch_size: 1
+crop_size:
+- 256
+- 256
+detection_folders:
+- TRA
+dropout: 0.01
+example_images: False  # Slow
+input_train:
+- data/ctc/Fluo-N2DL-HeLa/01
+input_val:
+- data/ctc/Fluo-N2DL-HeLa/02
+max_tokens: 2048
+name: example
+ndim: 2
+num_decoder_layers: 5
+num_encoder_layers: 5
+outdir: runs
+distributed: False
+window: 4

_utils/load_models.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from config import RunConfig
+import torch
+from diffusers.pipelines.stable_diffusion import StableDiffusionPipeline
+import torch.nn as nn
+def load_stable_diffusion_model(config: RunConfig):
+    device = torch.device('cpu')
+    if config.sd_2_1:
+        stable_diffusion_version = "stabilityai/stable-diffusion-2-1-base"
+    else:
+        stable_diffusion_version = "CompVis/stable-diffusion-v1-4"
+    # stable = StableCountingPipeline.from_pretrained(stable_diffusion_version).to(device)
+    stable = StableDiffusionPipeline.from_pretrained(stable_diffusion_version).to(device)
+    return stable

_utils/load_track_data.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import os
+from glob import glob
+from pathlib import Path
+from natsort import natsorted
+from PIL import Image
+import numpy as np
+import tifffile
+import skimage.io as io
+import torchvision.transforms as T
+import cv2
+from tqdm import tqdm
+from models.tra_post_model.trackastra.utils import normalize_01, normalize
+IMG_SIZE = 512
+def _load_tiffs(folder: Path, dtype=None):
+    """Load a sequence of tiff files from a folder into a 3D numpy array."""
+    images = glob(str(folder / "*.tif"))
+    test_data = tifffile.imread(images[0])
+    if len(test_data.shape) == 3:
+        turn_gray = True
+    else:
+        turn_gray = False
+    end_frame = len(images)
+    if not turn_gray:
+        x = np.stack([
+            tifffile.imread(f).astype(dtype)
+            for f in tqdm(
+                sorted(folder.glob("*.tif"))[0 : end_frame : 1],
+                leave=False,
+                desc=f"Loading [0:{end_frame}]",
+            )
+        ])
+    else:
+        x = []
+        for f in tqdm(
+            sorted(folder.glob("*.tif"))[0 : end_frame : 1],
+            leave=False,
+            desc=f"Loading [0:{end_frame}]",
+        ):
+            img = tifffile.imread(f).astype(dtype)
+            if img.ndim == 3:
+                if img.shape[-1] > 3:
+                    img = img[..., :3]
+                img = (0.299 * img[..., 0] + 0.587 * img[..., 1] + 0.114 * img[..., 2])
+            x.append(img)
+        x = np.stack(x)
+    return x
+def load_track_images(file_dir):
+    # suffix_ = [".png", ".tif", ".tiff", ".jpg"]
+    def find_tif_dir(root_dir):
+        """递归查找.tif 文件"""
+        tif_files = []
+        for dirpath, _, filenames in os.walk(root_dir):
+            if '__MACOSX' in dirpath:
+                continue
+            for f in filenames:
+                if f.lower().endswith('.tif'):
+                    tif_files.append(os.path.join(dirpath, f))
+        return tif_files
+    tif_dir = find_tif_dir(file_dir)
+    print(f"Found {len(tif_dir)} tif images in {file_dir}")
+    print(f"First 5 tif images: {tif_dir[:5]}")
+    assert len(tif_dir) > 0, f"No tif images found in {file_dir}"
+    images = natsorted(tif_dir)
+    imgs = []
+    imgs_raw = []
+    images_stable = []
+    # load images for seg and track
+    for img_path in tqdm(images, desc="Loading images"):
+        img = tifffile.imread(img_path)
+        img_raw = io.imread(img_path)
+        if img.dtype == 'uint16':
+            img = ((img - img.min()) / (img.max() - img.min() + 1e-6) * 255).astype(np.uint8)
+            img = np.stack([img] * 3, axis=-1)
+            w, h = img.shape[1], img.shape[0]
+        else:
+            img = Image.open(img_path).convert("RGB")
+            w, h = img.size
+        img = T.Compose([
+            T.ToTensor(),
+            T.Resize((IMG_SIZE, IMG_SIZE)),
+        ])(img)
+        image_stable = img - 0.5
+        img = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(img)
+        imgs.append(img)
+        imgs_raw.append(img_raw)
+        images_stable.append(image_stable)
+    height = h
+    width = w
+    imgs = np.stack(imgs, axis=0)
+    imgs_raw = np.stack(imgs_raw, axis=0)
+    images_stable = np.stack(images_stable, axis=0)
+    # track data
+    imgs_ = _load_tiffs(Path(file_dir), dtype=np.float32)
+    imgs_01 = np.stack([
+                normalize_01(_x) for _x in tqdm(imgs_, desc="Normalizing", leave=False)
+            ])
+    imgs_ = np.stack([
+                normalize(_x) for _x in tqdm(imgs_, desc="Normalizing", leave=False)
+            ])
+    return imgs, imgs_raw, images_stable, imgs_, imgs_01, height, width
+if __name__ == "__main__":
+    file_dir = "data/2D+Time/DIC-C2DH-HeLa/train/DIC-C2DH-HeLa/02"
+    imgs, imgs_raw, images_stable, imgs_, imgs_01, height, width = load_track_images(file_dir)
+    print(imgs.shape, imgs_raw.shape, images_stable.shape, imgs_.shape, imgs_01.shape, height, width)

_utils/misc_helper.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import logging
+import os
+import random
+import shutil
+from collections.abc import Mapping
+from datetime import datetime
+import numpy as np
+import torch
+import torch.distributed as dist
+def basicConfig(*args, **kwargs):
+    return
+# To prevent duplicate logs, we mask this baseConfig setting
+logging.basicConfig = basicConfig
+def create_logger(name, log_file, level=logging.INFO):
+    log = logging.getLogger(name)
+    formatter = logging.Formatter(
+        "[%(asctime)s][%(filename)15s][line:%(lineno)4d][%(levelname)8s] %(message)s"
+    )
+    fh = logging.FileHandler(log_file)
+    fh.setFormatter(formatter)
+    sh = logging.StreamHandler()
+    sh.setFormatter(formatter)
+    log.setLevel(level)
+    log.addHandler(fh)
+    log.addHandler(sh)
+    return log
+def get_current_time():
+    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return current_time

_utils/seg_eval.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import torch
+def iou_torch(inst1, inst2):
+    inter = torch.logical_and(inst1, inst2).sum().float()
+    union = torch.logical_or(inst1, inst2).sum().float()
+    if union == 0:
+        return torch.tensor(float('nan'))
+    return inter / union
+def get_instances_torch(mask):
+    # 返回所有非背景的 instance mask（布尔型）
+    ids = torch.unique(mask)
+    return [(mask == i) for i in ids if i != 0]
+def compute_instance_miou(pred_mask, gt_mask):
+    # pred_mask 和 gt_mask 都是 torch.Tensor, shape [H, W], 整数类型
+    pred_instances = get_instances_torch(pred_mask)
+    gt_instances = get_instances_torch(gt_mask)
+    ious = []
+    for gt in gt_instances:
+        best_iou = torch.tensor(0.0).to(pred_mask.device)
+        for pred in pred_instances:
+            i = iou_torch(pred, gt)
+            if i > best_iou:
+                best_iou = i
+        ious.append(best_iou)
+    # 处理空情况
+    if len(ious) == 0:
+        return torch.tensor(float('nan'))
+    return torch.nanmean(torch.stack(ious))
+from torch import Tensor
+def dice_coeff(input: Tensor, target: Tensor, reduce_batch_first: bool = False, epsilon: float = 1e-6):
+    # Average of Dice coefficient for all batches, or for a single mask
+    assert input.size() == target.size()
+    assert input.dim() == 3 or not reduce_batch_first
+    sum_dim = (-1, -2) if input.dim() == 2 or not reduce_batch_first else (-1, -2, -3)
+    inter = 2 * (input * target).sum(dim=sum_dim)
+    sets_sum = input.sum(dim=sum_dim) + target.sum(dim=sum_dim)
+    sets_sum = torch.where(sets_sum == 0, inter, sets_sum)
+    dice = (inter + epsilon) / (sets_sum + epsilon)
+    return dice.mean()
+def multiclass_dice_coeff(input: Tensor, target: Tensor, reduce_batch_first: bool = False, epsilon: float = 1e-6):
+    # Average of Dice coefficient for all classes
+    return dice_coeff(input.flatten(0, 1), target.flatten(0, 1), reduce_batch_first, epsilon)
+def dice_loss(input: Tensor, target: Tensor, multiclass: bool = False):
+    # Dice loss (objective to minimize) between 0 and 1
+    fn = multiclass_dice_coeff if multiclass else dice_coeff
+    return 1 - fn(input, target, reduce_batch_first=True)

_utils/track_args.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import configargparse
+def parse_train_args():
+    parser = configargparse.ArgumentParser(
+        formatter_class=configargparse.ArgumentDefaultsHelpFormatter,
+        config_file_parser_class=configargparse.YAMLConfigFileParser,
+        allow_abbrev=False,
+    )
+    parser.add_argument(
+        "-c",
+        "--config",
+        default="_utils/example_config.yaml",
+        is_config_file=True,
+        help="config file path",
+    )
+    parser.add_argument("--device", type=str, choices=["cuda", "cpu"], default="cuda")
+    parser.add_argument("-o", "--outdir", type=str, default="runs")
+    parser.add_argument("--name", type=str, help="Name to append to timestamp")
+    parser.add_argument("--timestamp", type=bool, default=True)
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        default="",
+        help="load this model at start (e.g. to continue training)",
+    )
+    parser.add_argument(
+        "--ndim", type=int, default=2, help="number of spatial dimensions"
+    )
+    parser.add_argument("-d", "--d_model", type=int, default=256)
+    parser.add_argument("-w", "--window", type=int, default=10)
+    parser.add_argument("--epochs", type=int, default=100)
+    parser.add_argument("--warmup_epochs", type=int, default=10)
+    parser.add_argument(
+        "--detection_folders",
+        type=str,
+        nargs="+",
+        default=["TRA"],
+        help=(
+            "Subfolders to search for detections. Defaults to `TRA`, which corresponds"
+            " to using only the GT."
+        ),
+    )
+    parser.add_argument("--downscale_temporal", type=int, default=1)
+    parser.add_argument("--downscale_spatial", type=int, default=1)
+    parser.add_argument("--spatial_pos_cutoff", type=int, default=256)
+    parser.add_argument("--from_subfolder", action="store_true")
+    # parser.add_argument("--train_samples", type=int, default=50000)
+    parser.add_argument("--num_encoder_layers", type=int, default=6)
+    parser.add_argument("--num_decoder_layers", type=int, default=6)
+    parser.add_argument("--pos_embed_per_dim", type=int, default=32)
+    parser.add_argument("--feat_embed_per_dim", type=int, default=8)
+    parser.add_argument("--dropout", type=float, default=0.00)
+    parser.add_argument("--num_workers", type=int, default=4)
+    parser.add_argument("--batch_size", type=int, default=1)
+    parser.add_argument("--max_tokens", type=int, default=None)
+    parser.add_argument("--delta_cutoff", type=int, default=2)
+    parser.add_argument("--lr", type=float, default=1e-4)
+    parser.add_argument(
+        "--attn_positional_bias",
+        type=str,
+        choices=["rope", "bias", "none"],
+        default="rope",
+    )
+    parser.add_argument("--attn_positional_bias_n_spatial", type=int, default=16)
+    parser.add_argument("--attn_dist_mode", default="v0")
+    parser.add_argument("--mixedp", type=bool, default=True)
+    parser.add_argument("--dry", action="store_true")
+    parser.add_argument("--profile", action="store_true")
+    parser.add_argument(
+        "--features",
+        type=str,
+        choices=[
+            "none",
+            "regionprops",
+            "regionprops2",
+            "patch",
+            "patch_regionprops",
+            "wrfeat",
+        ],
+        default="wrfeat",
+    )
+    parser.add_argument(
+        "--causal_norm",
+        type=str,
+        choices=["none", "linear", "softmax", "quiet_softmax"],
+        default="quiet_softmax",
+    )
+    parser.add_argument("--div_upweight", type=float, default=2)
+    parser.add_argument("--augment", type=int, default=3)
+    parser.add_argument("--tracking_frequency", type=int, default=-1)
+    parser.add_argument("--sanity_dist", action="store_true")
+    parser.add_argument("--preallocate", type=bool, default=False)
+    parser.add_argument("--only_prechecks", action="store_true")
+    parser.add_argument(
+        "--compress", type=bool, default=True, help="compress dataset"
+    )
+    parser.add_argument("--seed", type=int, default=None)
+    parser.add_argument(
+        "--logger",
+        type=str,
+        default="tensorboard",
+        choices=["tensorboard", "wandb", "none"],
+    )
+    parser.add_argument("--wandb_project", type=str, default="trackastra")
+    parser.add_argument(
+        "--crop_size",
+        type=int,
+        # required=True,
+        nargs="+",
+        default=None,
+        help="random crop size for augmentation",
+    )
+    parser.add_argument(
+        "--weight_by_ndivs",
+        type=bool,
+        default=True,
+        help="Oversample windows that contain divisions",
+    )
+    parser.add_argument(
+        "--weight_by_dataset",
+        type=bool,
+        default=False,
+        help=(
+            "Inversely weight datasets by number of samples (to counter dataset size"
+            " imbalance)"
+        ),
+    )
+    args, unknown_args = parser.parse_known_args()
+    # # Hack to allow for --input_test
+    # allowed_unknown = ["input_test"]
+    # if not set(a.split("=")[0].strip("-") for a in unknown_args).issubset(
+    #     set(allowed_unknown)
+    # ):
+    #     raise ValueError(f"Unknown args: {unknown_args}")
+    # pprint(vars(args))
+    # for backward compatibility
+    # if args.attn_positional_bias == "True":
+    #     args.attn_positional_bias = "bias"
+    # elif args.attn_positional_bias == "False":
+    #     args.attn_positional_bias = False
+    # if args.train_samples == 0:
+    #     raise NotImplementedError(
+    #         "--train_samples must be > 0, full dataset pass not supported."
+    #     )
+    return args

app.py ADDED Viewed

	@@ -0,0 +1,1638 @@

+import gradio as gr
+from gradio_bbox_annotator import BBoxAnnotator
+from PIL import Image
+import numpy as np
+import torch
+import os
+import shutil
+import time
+import json
+import uuid
+from pathlib import Path
+import tempfile
+import zipfile
+from skimage import measure
+from matplotlib import cm
+from glob import glob
+from natsort import natsorted
+from huggingface_hub import HfApi, upload_file
+# import spaces
+# ===== 导入三个推理模块 =====
+from inference_seg import load_model as load_seg_model, run as run_seg
+from inference_count import load_model as load_count_model, run as run_count
+from inference_track import load_model as load_track_model, run as run_track
+HF_TOKEN = os.getenv("HF_TOKEN")
+DATASET_REPO = "phoebe777777/celltool_feedback"
+# ===== 清理缓存目录 =====
+print("===== clearing cache =====")
+# cache_path = os.path.expanduser("~/.cache/")
+cache_path = os.path.expanduser("~/.cache/huggingface/gradio")
+if os.path.exists(cache_path):
+    try:
+        shutil.rmtree(cache_path)
+        # print("✅ Deleted ~/.cache/")
+        print("✅ Deleted ~/.cache/huggingface/gradio")
+    except:
+        pass
+# ===== 全局模型变量 =====
+SEG_MODEL = None
+SEG_DEVICE = torch.device("cpu")
+COUNT_MODEL = None
+COUNT_DEVICE = torch.device("cpu")
+TRACK_MODEL = None
+TRACK_DEVICE = torch.device("cpu")
+def load_all_models():
+    """启动时加载所有模型"""
+    global SEG_MODEL, SEG_DEVICE
+    global COUNT_MODEL, COUNT_DEVICE
+    global TRACK_MODEL, TRACK_DEVICE
+    print("\n" + "="*60)
+    print("📦 Loading Segmentation Model")
+    print("="*60)
+    SEG_MODEL, SEG_DEVICE = load_seg_model(use_box=False)
+    print("\n" + "="*60)
+    print("📦 Loading Counting Model")
+    print("="*60)
+    COUNT_MODEL, COUNT_DEVICE = load_count_model(use_box=False)
+    print("\n" + "="*60)
+    print("📦 Loading Tracking Model")
+    print("="*60)
+    TRACK_MODEL, TRACK_DEVICE = load_track_model(use_box=False)
+    print("\n" + "="*60)
+    print("✅ All Models Loaded Successfully")
+    print("="*60)
+load_all_models()
+# ===== 保存用户反馈 =====
+DATASET_DIR = Path("solver_cache")
+DATASET_DIR.mkdir(parents=True, exist_ok=True)
+def save_feedback_to_hf(query_id, feedback_type, feedback_text=None, img_path=None, bboxes=None):
+    """保存反馈到 Hugging Face Dataset"""
+    # 如果没有 token，回退到本地存储
+    if not HF_TOKEN:
+        print("⚠️ No HF_TOKEN found, using local storage")
+        save_feedback(query_id, feedback_type, feedback_text, img_path, bboxes)
+        return
+    feedback_data = {
+        "query_id": query_id,
+        "feedback_type": feedback_type,
+        "feedback_text": feedback_text,
+        "image_path": img_path,
+        "bboxes": str(bboxes),  # 转为字符串
+        "datetime": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "timestamp": time.time()
+    }
+    try:
+        api = HfApi()
+        # 创建临时文件
+        filename = f"feedback_{query_id}_{int(time.time())}.json"
+        with open(filename, 'w', encoding='utf-8') as f:
+            json.dump(feedback_data, f, indent=2, ensure_ascii=False)
+        # 上传到 dataset
+        api.upload_file(
+            path_or_fileobj=filename,
+            path_in_repo=f"data/{filename}",
+            repo_id=DATASET_REPO,
+            repo_type="dataset",
+            token=HF_TOKEN
+        )
+        # 清理本地文件
+        os.remove(filename)
+        print(f"✅ Feedback saved to HF Dataset: {DATASET_REPO}")
+    except Exception as e:
+        print(f"⚠️ Failed to save to HF Dataset: {e}")
+        # 回退到本地存储
+        save_feedback(query_id, feedback_type, feedback_text, img_path, bboxes)
+def save_feedback(query_id, feedback_type, feedback_text=None, img_path=None, bboxes=None):
+    """保存用户反馈到JSON文件"""
+    feedback_data = {
+        "query_id": query_id,
+        "feedback_type": feedback_type,
+        "feedback_text": feedback_text,
+        "image": img_path,
+        "bboxes": bboxes,
+        "datetime": time.strftime("%Y%m%d_%H%M%S")
+    }
+    feedback_file = DATASET_DIR / query_id / "feedback.json"
+    feedback_file.parent.mkdir(parents=True, exist_ok=True)
+    if feedback_file.exists():
+        with feedback_file.open("r") as f:
+            existing = json.load(f)
+            if not isinstance(existing, list):
+                existing = [existing]
+            existing.append(feedback_data)
+            feedback_data = existing
+    else:
+        feedback_data = [feedback_data]
+    with feedback_file.open("w") as f:
+        json.dump(feedback_data, f, indent=4, ensure_ascii=False)
+# ===== 辅助函数 =====
+def parse_first_bbox(bboxes):
+    """解析第一个边界框"""
+    if not bboxes:
+        return None
+    b = bboxes[0]
+    if isinstance(b, dict):
+        x, y = float(b.get("x", 0)), float(b.get("y", 0))
+        w, h = float(b.get("width", 0)), float(b.get("height", 0))
+        return x, y, x + w, y + h
+    if isinstance(b, (list, tuple)) and len(b) >= 4:
+        return float(b[0]), float(b[1]), float(b[2]), float(b[3])
+    return None
+def parse_bboxes(bboxes):
+    """解析所有边界框"""
+    if not bboxes:
+        return None
+    result = []
+    for b in bboxes:
+        if isinstance(b, dict):
+            x, y = float(b.get("x", 0)), float(b.get("y", 0))
+            w, h = float(b.get("width", 0)), float(b.get("height", 0))
+            result.append([x, y, x + w, y + h])
+        elif isinstance(b, (list, tuple)) and len(b) >= 4:
+            result.append([float(b[0]), float(b[1]), float(b[2]), float(b[3])])
+    return result
+def colorize_mask(mask: np.ndarray, num_colors: int = 512) -> np.ndarray:
+    """将实例掩码转换为彩色图像"""
+    def hsv_to_rgb(h, s, v):
+        i = int(h * 6.0)
+        f = h * 6.0 - i
+        i = i % 6
+        p = v * (1 - s)
+        q = v * (1 - f * s)
+        t = v * (1 - (1 - f) * s)
+        if i == 0: r, g, b = v, t, p
+        elif i == 1: r, g, b = q, v, p
+        elif i == 2: r, g, b = p, v, t
+        elif i == 3: r, g, b = p, q, v
+        elif i == 4: r, g, b = t, p, v
+        else: r, g, b = v, p, q
+        return int(r * 255), int(g * 255), int(b * 255)
+    palette = [(0, 0, 0)]
+    for i in range(1, num_colors):
+        h = (i % num_colors) / float(num_colors)
+        palette.append(hsv_to_rgb(h, 1.0, 0.95))
+    palette_arr = np.array(palette, dtype=np.uint8)
+    color_idx = mask % num_colors
+    return palette_arr[color_idx]
+# ===== 分割功能 =====
+# @spaces.GPU
+def segment_with_choice(use_box_choice, annot_value):
+    """分割主函数 - 每个实例不同颜色+轮廓"""
+    if annot_value is None or len(annot_value) < 1:
+        print("❌ No annotation input")
+        return None, None
+    img_path = annot_value[0]
+    bboxes = annot_value[1] if len(annot_value) > 1 else []
+    print(f"🖼️ Image path: {img_path}")
+    box_array = None
+    if use_box_choice == "Yes" and bboxes:
+        # box = parse_first_bbox(bboxes)
+        # if box:
+        #     xmin, ymin, xmax, ymax = map(int, box)
+        #     box_array = [[xmin, ymin, xmax, ymax]]
+        #     print(f"📦 Using bounding box: {box_array}")
+        box = parse_bboxes(bboxes)
+        if box:
+            box_array = box
+            print(f"📦 Using bounding boxes: {box_array}")
+    # 运行分割模型
+    try:
+        mask = run_seg(SEG_MODEL, img_path, box=box_array, device=SEG_DEVICE)
+        print("📏 mask shape:", mask.shape, "dtype:", mask.dtype, "unique:", np.unique(mask))
+    except Exception as e:
+        print(f"❌ Inference failed: {str(e)}")
+        return None, None
+    # 保存原始mask为TIF文件
+    temp_mask_file = tempfile.NamedTemporaryFile(delete=False, suffix=".tif")
+    mask_img = Image.fromarray(mask.astype(np.uint16))
+    mask_img.save(temp_mask_file.name)
+    print(f"💾 Original mask saved to: {temp_mask_file.name}")
+    # 读取原图
+    try:
+        img = Image.open(img_path)
+        print("📷 Image mode:", img.mode, "size:", img.size)
+    except Exception as e:
+        print(f"❌ Failed to open image: {e}")
+        return None, None
+    try:
+        img_rgb = img.convert("RGB").resize(mask.shape[::-1], resample=Image.BILINEAR)
+        img_np = np.array(img_rgb, dtype=np.float32)
+        if img_np.max() > 1.5:
+            img_np = img_np / 255.0
+    except Exception as e:
+        print(f"❌ Error in image conversion/resizing: {e}")
+        return None, None
+    mask_np = np.array(mask)
+    inst_mask = mask_np.astype(np.int32)
+    unique_ids = np.unique(inst_mask)
+    num_instances = len(unique_ids[unique_ids != 0])
+    print(f"✅ Instance IDs found: {unique_ids}, Total instances: {num_instances}")
+    if num_instances == 0:
+        print("⚠️ No instance found, returning dummy red image")
+        return Image.new("RGB", mask.shape[::-1], (255, 0, 0)), None
+    # ==== Color Overlay (每个实例一个颜色) ====
+    overlay = img_np.copy()
+    alpha = 0.5
+    # cmap = cm.get_cmap("hsv", num_instances + 1)
+    for inst_id in np.unique(inst_mask):
+        if inst_id == 0:
+            continue
+        binary_mask = (inst_mask == inst_id).astype(np.uint8)
+        # color = np.array(cmap(inst_id / (num_instances + 1))[:3])  # RGB only, ignore alpha
+        color = get_well_spaced_color(inst_id)
+        overlay[binary_mask == 1] = (1 - alpha) * overlay[binary_mask == 1] + alpha * color
+        # 绘制轮廓
+        contours = measure.find_contours(binary_mask, 0.5)
+        for contour in contours:
+            contour = contour.astype(np.int32)
+            # 确保坐标在范围内
+            valid_y = np.clip(contour[:, 0], 0, overlay.shape[0] - 1)
+            valid_x = np.clip(contour[:, 1], 0, overlay.shape[1] - 1)
+            overlay[valid_y, valid_x] = [1.0, 1.0, 0.0]  # 黄色轮廓
+    overlay = np.clip(overlay * 255.0, 0, 255).astype(np.uint8)
+    return Image.fromarray(overlay), temp_mask_file.name
+# ===== 计数功能 =====
+# @spaces.GPU
+def count_cells_handler(use_box_choice, annot_value):
+    """Counting handler - supports bounding box, returns only density map"""
+    if annot_value is None or len(annot_value) < 1:
+        return None, "⚠️ Please provide an image."
+    image_path = annot_value[0]
+    bboxes = annot_value[1] if len(annot_value) > 1 else []
+    print(f"🖼️ Image path: {image_path}")
+    box_array = None
+    if use_box_choice == "Yes" and bboxes:
+        # box = parse_first_bbox(bboxes)
+        # if box:
+        #     xmin, ymin, xmax, ymax = map(int, box)
+        #     box_array = [[xmin, ymin, xmax, ymax]]
+        #     print(f"📦 Using bounding box: {box_array}")
+        box = parse_bboxes(bboxes)
+        if box:
+            box_array = box
+            print(f"📦 Using bounding boxes: {box_array}")
+    try:
+        print(f"🔢 Counting - Image: {image_path}")
+        result = run_count(
+            COUNT_MODEL,
+            image_path,
+            box=box_array,
+            device=COUNT_DEVICE,
+            visualize=True
+        )
+        if 'error' in result:
+            return None, f"❌ Counting failed: {result['error']}"
+        count = result['count']
+        density_map = result['density_map']
+        # save density map as temp file
+        temp_density_file = tempfile.NamedTemporaryFile(delete=False, suffix=".npy")
+        np.save(temp_density_file.name, density_map)
+        print(f"💾 Density map saved to {temp_density_file.name}")
+        try:
+            img = Image.open(image_path)
+            print("📷 Image mode:", img.mode, "size:", img.size)
+        except Exception as e:
+            print(f"❌ Failed to open image: {e}")
+            return None, None
+        try:
+            img_rgb = img.convert("RGB").resize(density_map.shape[::-1], resample=Image.BILINEAR)
+            img_np = np.array(img_rgb, dtype=np.float32)
+            img_np = (img_np - img_np.min()) / (img_np.max() - img_np.min() + 1e-8)
+            if img_np.max() > 1.5:
+                img_np = img_np / 255.0
+        except Exception as e:
+            print(f"❌ Error in image conversion/resizing: {e}")
+            return None, None
+        # Normalize density map to [0, 1]
+        density_normalized = density_map.copy()
+        if density_normalized.max() > 0:
+            density_normalized = (density_normalized - density_normalized.min()) / (density_normalized.max() - density_normalized.min())
+        # Apply colormap
+        cmap = cm.get_cmap("jet")
+        alpha = 0.3
+        density_colored = cmap(density_normalized)[:, :, :3]  # RGB only, ignore alpha
+        # Create overlay
+        overlay = img_np.copy()
+        # Blend only where density is significant (optional: threshold)
+        threshold = 0.01  # Only overlay where density > 1% of max
+        significant_mask = density_normalized > threshold
+        overlay[significant_mask] = (1 - alpha) * overlay[significant_mask] + alpha * density_colored[significant_mask]
+        # Clip and convert to uint8
+        overlay = np.clip(overlay * 255.0, 0, 255).astype(np.uint8)
+        result_text = f"✅ Detected {round(count)} objects"
+        if use_box_choice == "Yes" and box:
+            result_text += f"\n📦 Using bounding box: {box_array}"
+        print(f"✅ Counting done - Count: {count:.1f}")
+        return Image.fromarray(overlay), temp_density_file.name, result_text
+        # return density_path, result_text
+    except Exception as e:
+        print(f"❌ Counting error: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, f"❌ Counting failed: {str(e)}"
+# ===== Tracking Functionality =====
+def find_tif_dir(root_dir):
+    """Recursively find the first directory containing .tif files"""
+    for dirpath, _, filenames in os.walk(root_dir):
+        if '__MACOSX' in dirpath:
+            continue
+        if any(f.lower().endswith('.tif') for f in filenames):
+            return dirpath
+    return None
+def is_valid_tiff(filepath):
+    """Check if a file is a valid TIFF image"""
+    try:
+        with Image.open(filepath) as img:
+            img.verify()
+            return True
+    except Exception as e:
+        return False
+def find_valid_tif_dir(root_dir):
+    """Recursively find the first directory containing valid .tif files"""
+    for dirpath, dirnames, filenames in os.walk(root_dir):
+        if '__MACOSX' in dirpath:
+            continue
+        potential_tifs = [
+            os.path.join(dirpath, f)
+            for f in filenames
+            if f.lower().endswith(('.tif', '.tiff')) and not f.startswith('._')
+        ]
+        if not potential_tifs:
+            continue
+        valid_tifs = [f for f in potential_tifs if is_valid_tiff(f)]
+        if valid_tifs:
+            print(f"✅ Found {len(valid_tifs)} valid TIFF files in: {dirpath}")
+            return dirpath
+    return None
+def create_ctc_results_zip(output_dir):
+    """
+    Create a ZIP file with CTC format results
+    Parameters:
+    -----------
+    output_dir : str
+        Directory containing tracking results (res_track.txt, etc.)
+    Returns:
+    --------
+    zip_path : str
+        Path to created ZIP file
+    """
+    # Create temp directory for ZIP
+    temp_zip_dir = tempfile.mkdtemp()
+    zip_filename = f"tracking_results_{time.strftime('%Y%m%d_%H%M%S')}.zip"
+    zip_path = os.path.join(temp_zip_dir, zip_filename)
+    print(f"📦 Creating results ZIP: {zip_path}")
+    # Create ZIP with all tracking results
+    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+        # Add all files from output directory
+        for root, dirs, files in os.walk(output_dir):
+            for file in files:
+                file_path = os.path.join(root, file)
+                arcname = os.path.relpath(file_path, output_dir)
+                zipf.write(file_path, arcname)
+                print(f"  📄 Added: {arcname}")
+        # Add a README with summary
+        readme_content = f"""Tracking Results Summary
+            ========================
+            Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}
+            Files:
+            ------
+            - res_track.txt: CTC format tracking data
+            Format: track_id start_frame end_frame parent_id
+            - Segmentation masks
+            For more information on CTC format:
+            http://celltrackingchallenge.net/
+            """
+        zipf.writestr("README.txt", readme_content)
+    print(f"✅ ZIP created: {zip_path} ({os.path.getsize(zip_path) / 1024:.1f} KB)")
+    return zip_path
+# 使用更智能的颜色分配 - 让相邻的ID颜色差异更大
+def get_well_spaced_color(track_id, num_colors=256):
+    """Generate well-spaced colors, using contrasting colors for adjacent IDs"""
+    # 使用质数跳跃来分散颜色
+    golden_ratio = 0.618033988749895
+    hue = (track_id * golden_ratio) % 1.0
+    # 使用高饱和度和明度
+    import colorsys
+    rgb = colorsys.hsv_to_rgb(hue, 0.9, 0.95)
+    return np.array(rgb)
+def extract_first_frame(tif_dir):
+    """
+    Extract the first frame from a directory of TIF files
+    Returns:
+    --------
+    first_frame_path : str
+        Path to the first TIF frame
+    """
+    tif_files = natsorted(glob(os.path.join(tif_dir, "*.tif")) +
+                      glob(os.path.join(tif_dir, "*.tiff")))
+    valid_tif_files = [f for f in tif_files
+                      if not os.path.basename(f).startswith('._') and is_valid_tiff(f)]
+    if valid_tif_files:
+        return valid_tif_files[0]
+    return None
+def create_tracking_visualization(tif_dir, output_dir, valid_tif_files):
+    """
+    Create an animated GIF/video showing tracked objects with consistent colors
+    Parameters:
+    -----------
+    tif_dir : str
+        Directory containing input TIF frames
+    output_dir : str
+        Directory containing tracking results (masks)
+    valid_tif_files : list
+        List of valid TIF file paths
+    Returns:
+    --------
+    video_path : str
+        Path to generated visualization (GIF or first frame)
+    """
+    import numpy as np
+    from matplotlib import colormaps
+    from skimage import measure
+    import tifffile
+    # Look for tracking mask files in output directory
+    # Common CTC formats: man_track*.tif, mask*.tif, or numbered masks
+    mask_files = natsorted(glob(os.path.join(output_dir, "mask*.tif")) +
+                       glob(os.path.join(output_dir, "man_track*.tif")) +
+                       glob(os.path.join(output_dir, "*.tif")))
+    if not mask_files:
+        print("⚠️  No mask files found in output directory")
+        # Return first frame as fallback
+        return valid_tif_files[0]
+    print(f"📊 Found {len(mask_files)} mask files")
+    # Create color map for consistent track IDs
+    # Use a colormap with many distinct colors
+    # try:
+    #     cmap = colormaps.get_cmap("hsv")
+    # except:
+    #     from matplotlib import cm
+    #     cmap = cm.get_cmap("hsv")
+    frames = []
+    alpha = 0.3  # Transparency for overlay
+    # Process each frame
+    num_frames = min(len(valid_tif_files), len(mask_files))
+    for i in range(num_frames):
+        try:
+            # Load original image using tifffile (handles ZSTD compression)
+            try:
+                img_np = tifffile.imread(valid_tif_files[i])
+                # Normalize to [0, 1] range based on actual data type and values
+                if img_np.dtype == np.uint8:
+                    img_np = img_np.astype(np.float32) / 255.0
+                elif img_np.dtype == np.uint16:
+                    # Normalize uint16 to [0, 1] using actual min/max
+                    img_min, img_max = img_np.min(), img_np.max()
+                    if img_max > img_min:
+                        img_np = (img_np.astype(np.float32) - img_min) / (img_max - img_min)
+                    else:
+                        img_np = img_np.astype(np.float32) / 65535.0
+                else:
+                    # For float or other types, normalize based on actual range
+                    img_np = img_np.astype(np.float32)
+                    img_min, img_max = img_np.min(), img_np.max()
+                    if img_max > img_min:
+                        img_np = (img_np - img_min) / (img_max - img_min)
+                    else:
+                        img_np = np.clip(img_np, 0, 1)
+                # Convert to RGB if grayscale
+                if img_np.ndim == 2:
+                    img_np = np.stack([img_np]*3, axis=-1)
+                img_np = img_np.astype(np.float32)
+                if img_np.max() > 1.5:
+                    img_np = img_np / 255.0
+            except Exception as e:
+                print(f"⚠️  Error loading image frame {i}: {e}")
+                # Fallback to PIL
+                img = Image.open(valid_tif_files[i]).convert("RGB")
+                img_np = np.array(img, dtype=np.float32) / 255.0
+            # Load tracking mask using tifffile (handles ZSTD compression)
+            try:
+                mask = tifffile.imread(mask_files[i])
+            except Exception as e:
+                print(f"⚠️  Error loading mask frame {i}: {e}")
+                # Fallback to PIL
+                mask = np.array(Image.open(mask_files[i]))
+            # Resize mask to match image if needed
+            if mask.shape[:2] != img_np.shape[:2]:
+                from scipy.ndimage import zoom
+                zoom_factors = [img_np.shape[0] / mask.shape[0], img_np.shape[1] / mask.shape[1]]
+                mask = zoom(mask, zoom_factors, order=0).astype(mask.dtype)
+            # Create overlay
+            overlay = img_np.copy()
+            # Get unique track IDs (excluding background 0)
+            track_ids = np.unique(mask)
+            track_ids = track_ids[track_ids != 0]
+            # Color each tracked object
+            for track_id in track_ids:
+                # Create binary mask for this track
+                binary_mask = (mask == track_id)
+                # Get consistent color for this track ID
+                # color = np.array(cmap(int(track_id) % 256)[:3])
+                color = get_well_spaced_color(int(track_id))
+                # Blend color onto image
+                overlay[binary_mask] = (1 - alpha) * overlay[binary_mask] + alpha * color
+                # Draw contours (optional, adds yellow boundaries)
+                try:
+                    contours = measure.find_contours(binary_mask.astype(np.uint8), 0.5)
+                    for contour in contours:
+                        contour = contour.astype(np.int32)
+                        valid_y = np.clip(contour[:, 0], 0, overlay.shape[0] - 1)
+                        valid_x = np.clip(contour[:, 1], 0, overlay.shape[1] - 1)
+                        overlay[valid_y, valid_x] = [1.0, 1.0, 0.0]  # Yellow contour
+                except:
+                    pass  # Skip contours if they fail
+            # Convert to uint8
+            overlay_uint8 = np.clip(overlay * 255.0, 0, 255).astype(np.uint8)
+            frames.append(Image.fromarray(overlay_uint8))
+            if i % 10 == 0 or i == num_frames - 1:
+                print(f"  📸 Processed frame {i+1}/{num_frames}")
+        except Exception as e:
+            print(f"⚠️  Error processing frame {i}: {e}")
+            import traceback
+            traceback.print_exc()
+            continue
+    if not frames:
+        print("⚠️  No frames were processed successfully")
+        return valid_tif_files[0]
+    # Save as animated GIF
+    try:
+        temp_gif = tempfile.NamedTemporaryFile(delete=False, suffix=".gif")
+        frames[0].save(
+            temp_gif.name,
+            save_all=True,
+            append_images=frames[1:],
+            duration=200,  # 200ms per frame = 5fps
+            loop=0
+        )
+        temp_gif.close()  # Close the file handle
+        print(f"✅ Created tracking visualization GIF: {temp_gif.name}")
+        print(f"   Size: {os.path.getsize(temp_gif.name)} bytes, Frames: {len(frames)}")
+        return temp_gif.name
+    except Exception as e:
+        print(f"⚠️  Failed to create GIF: {e}")
+        import traceback
+        traceback.print_exc()
+        # Return first frame as static image fallback
+        try:
+            temp_img = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+            frames[0].save(temp_img.name)
+            temp_img.close()
+            return temp_img.name
+        except:
+            return valid_tif_files[0]
+# @spaces.GPU
+def track_video_handler(use_box_choice, first_frame_annot, zip_file_obj):
+    """
+    支持 ZIP 压缩包上传的 Tracking 处理函数 - 支持首帧边界框
+    Parameters:
+    -----------
+    use_box_choice : str
+        "Yes" or "No" - 是否使用边界框
+    first_frame_annot : tuple or None
+        (image_path, bboxes) from BBoxAnnotator, only used if user annotated first frame
+    zip_file_obj : File
+        Uploaded ZIP file containing TIF sequence
+    """
+    if zip_file_obj is None:
+        return None, "⚠️ 请上传包含视频帧的压缩包 (.zip)", None, None
+    temp_dir = None
+    output_temp_dir = None
+    try:
+        # Parse bounding box if provided
+        box_array = None
+        if use_box_choice == "Yes" and first_frame_annot is not None:
+            if isinstance(first_frame_annot, (list, tuple)) and len(first_frame_annot) > 1:
+                bboxes = first_frame_annot[1]
+                if bboxes:
+                    # box = parse_first_bbox(bboxes)
+                    # if box:
+                    #     xmin, ymin, xmax, ymax = map(int, box)
+                    #     box_array = [[xmin, ymin, xmax, ymax]]
+                    #     print(f"📦 Using bounding box: {box_array}")
+                    box = parse_bboxes(bboxes)
+                    if box:
+                        box_array = box
+                        print(f"📦 Using bounding boxes: {box_array}")
+        # Extract input ZIP
+        temp_dir = tempfile.mkdtemp()
+        print(f"\n📦 Extracting to temporary directory: {temp_dir}")
+        with zipfile.ZipFile(zip_file_obj.name, 'r') as zip_ref:
+            extracted_count = 0
+            skipped_count = 0
+            for member in zip_ref.namelist():
+                basename = os.path.basename(member)
+                if ('__MACOSX' in member or
+                    basename.startswith('._') or
+                    basename.startswith('.DS_Store') or
+                    member.endswith('/')):
+                    skipped_count += 1
+                    continue
+                try:
+                    zip_ref.extract(member, temp_dir)
+                    extracted_count += 1
+                    if basename.lower().endswith(('.tif', '.tiff')):
+                        print(f"📄 Extracted TIFF: {basename}")
+                except Exception as e:
+                    print(f"⚠️  Failed to extract {member}: {e}")
+            print(f"\n📊 Extracted: {extracted_count} files, Skipped: {skipped_count} files")
+        # Find valid TIFF directory
+        tif_dir = find_valid_tif_dir(temp_dir)
+        if tif_dir is None:
+            return None, "❌ Did not find valid TIF directory", None, None
+        # Validate TIFF files
+        tif_files = natsorted(glob(os.path.join(tif_dir, "*.tif")) +
+                          glob(os.path.join(tif_dir, "*.tiff")))
+        valid_tif_files = [f for f in tif_files
+                          if not os.path.basename(f).startswith('._') and is_valid_tiff(f)]
+        if len(valid_tif_files) == 0:
+            return None, "❌ Did not find valid TIF files", None, None
+        print(f"📈 Using {len(valid_tif_files)} TIF files")
+        # Store paths for later visualization
+        first_frame_path = valid_tif_files[0]
+        # Create temporary output directory for CTC results
+        output_temp_dir = tempfile.mkdtemp()
+        print(f"💾 CTC-format results will be saved to: {output_temp_dir}")
+        # Run tracking with optional bounding box
+        result = run_track(
+            TRACK_MODEL,
+            video_dir=tif_dir,
+            box=box_array,  # Pass bounding box if specified
+            device=TRACK_DEVICE,
+            output_dir=output_temp_dir
+        )
+        if 'error' in result:
+            return None, f"❌ Tracking failed: {result['error']}", None, None
+        # Create visualization video of tracked objects
+        print("\n🎬 Creating tracking visualization...")
+        try:
+            tracking_video = create_tracking_visualization(
+                tif_dir,
+                output_temp_dir,
+                valid_tif_files
+            )
+        except Exception as e:
+            print(f"⚠️  Failed to create visualization: {e}")
+            import traceback
+            traceback.print_exc()
+            # Fallback to first frame if visualization fails
+            try:
+                tracking_video = Image.open(first_frame_path)
+            except:
+                tracking_video = None
+        # Create downloadable ZIP with results
+        try:
+            results_zip = create_ctc_results_zip(output_temp_dir)
+        except Exception as e:
+            print(f"⚠️  Failed to create ZIP: {e}")
+            results_zip = None
+        bbox_info = ""
+        if box_array:
+            bbox_info = f"\n🔲 Using bounding box: [{box_array[0][0]}, {box_array[0][1]}, {box_array[0][2]}, {box_array[0][3]}]"
+        result_text = f"""✅ Tracking completed!
+            🖼️  Processed frames: {len(valid_tif_files)}{bbox_info}
+            📥 Click the button below to download CTC-format results
+            The results include:
+            - res_track.txt (CTC-format tracking data)
+            - Other tracking-related files
+            - README.txt (Results description)
+            """
+        if use_box_choice == "Yes" and box:
+            result_text += f"\n📦 Using bounding box: {box_array}"
+        print(f"\n✅ Tracking completed")
+        # Clean up input temp directory (keep output temp for download)
+        if temp_dir:
+            try:
+                shutil.rmtree(temp_dir)
+                print(f"🗑️  Cleared input temp directory")
+            except:
+                pass
+        return results_zip, result_text, gr.update(visible=True), tracking_video
+    except zipfile.BadZipFile:
+        return None, "❌ Not a valid ZIP file", None, None
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        # Clean up on error
+        for d in [temp_dir, output_temp_dir]:
+            if d:
+                try:
+                    shutil.rmtree(d)
+                except:
+                    pass
+        return None, f"❌ Tracking failed: {str(e)}", None, None
+# ===== 示例图像 =====
+example_images_seg = [f for f in glob("example_imgs/seg/*")]
+# ["example_imgs/seg/003_img.png", "example_imgs/seg/1977_Well_F-5_Field_1.png"]
+example_images_cnt = [f for f in glob("example_imgs/cnt/*")]
+example_tracking_zips = [f for f in glob("example_imgs/tra/*.zip")]
+# ===== Gradio UI =====
+with gr.Blocks(
+    title="Microscopy Analysis Suite",
+    theme=gr.themes.Soft(),
+    css="""
+    .tabs button {
+        font-size: 18px !important;
+        font-weight: 600 !important;
+        padding: 12px 20px !important;
+    }
+    .uniform-height {
+        height: 500px !important;
+        display: flex !important;
+        align-items: center !important;
+        justify-content: center !important;
+    }
+    .uniform-height img,
+    .uniform-height canvas {
+        max-height: 500px !important;
+        object-fit: contain !important;
+    }
+    /* 强制密度图容器和图片高度 */
+    #density_map_output {
+        height: 500px !important;
+    }
+    #density_map_output .image-container {
+        height: 500px !important;
+    }
+    #density_map_output img {
+        height: 480px !important;
+        width: auto !important;
+        max-width: 90% !important;
+        object-fit: contain !important;
+    }
+    """
+    ) as demo:
+    gr.Markdown(
+        """
+        # 🔬 Microscopy Image Analysis Suite
+        Supporting three key tasks:
+        - 🎨 **Segmentation**: Instance segmentation of microscopic objects
+        - 🔢 **Counting**: Counting microscopic objects based on density maps
+        - 🎬 **Tracking**: Tracking microscopic objects in video sequences
+        """
+    )
+    # 全局状态
+    current_query_id = gr.State(str(uuid.uuid4()))
+    user_uploaded_examples = gr.State(example_images_seg.copy())  # 初始化时包含原始示例
+    with gr.Tabs():
+        # ===== Tab 1: Segmentation =====
+        with gr.Tab("🎨 Segmentation"):
+            gr.Markdown("## Instance Segmentation of Microscopic Objects")
+            gr.Markdown(
+                        """
+                        **Instructions:**
+                        1. Upload an image or select an example image (supports various formats: .png, .jpg, .tif)
+                        2. (Optional) Specify a target object with a bounding box and select "Yes", or click "Run Segmentation" directly
+                        3. Click "Run Segmentation"
+                        4. View the segmentation results, download the original predicted mask (.tif format); if needed, click "Clear Selection" to choose a new image
+                        🤘 Rate and submit feedback to help us improve the model!
+                        """
+                    )
+            with gr.Row():
+                with gr.Column(scale=1):
+                    annotator = BBoxAnnotator(
+                        label="🖼️ Upload Image (Optional: Provide a Bounding Box)",
+                        categories=["cell"],
+                    )
+                    # Example Images Gallery
+                    example_gallery = gr.Gallery(
+                        label="📁 Example Image Gallery",
+                        columns=len(example_images_seg),
+                        rows=1,
+                        height=120,
+                        object_fit="cover",
+                        show_download_button=False
+                    )
+                    with gr.Row():
+                        use_box_radio = gr.Radio(
+                            choices=["Yes", "No"],
+                            value="No",
+                            label="🔲 Specify Bounding Box?"
+                        )
+                    with gr.Row():
+                        run_seg_btn = gr.Button("▶️ Run Segmentation", variant="primary", size="lg")
+                        clear_btn = gr.Button("🔄 Clear Selection", variant="secondary")
+                    # Upload Example Image
+                    image_uploader = gr.Image(
+                        label="➕ Upload New Example Image to Gallery",
+                        type="filepath"
+                    )
+                with gr.Column(scale=2):
+                    seg_output = gr.Image(
+                        type="pil",
+                        label="📸 Segmentation Result",
+                        elem_classes="uniform-height"
+                    )
+                    # Download Original Prediction
+                    download_mask_btn = gr.File(
+                        label="📥 Download Original Prediction (.tif format)",
+                        visible=True,
+                        height=40,
+                    )
+                    # Satisfaction Rating
+                    score_slider = gr.Slider(
+                        minimum=1,
+                        maximum=5,
+                        step=1,
+                        value=5,
+                        label="🌟 Satisfaction Rating (1-5)"
+                    )
+                    # Feedback Textbox
+                    feedback_box = gr.Textbox(
+                        placeholder="Please enter your feedback...",
+                        lines=2,
+                        label="💬 Feedback"
+                    )
+                    # Submit Button
+                    submit_feedback_btn = gr.Button("💾 Submit Feedback", variant="secondary")
+                    feedback_status = gr.Textbox(
+                        label="✅ Submission Status",
+                        lines=1,
+                        visible=False
+                    )
+            # 绑定事件: 运行分割
+            run_seg_btn.click(
+                fn=segment_with_choice,
+                inputs=[use_box_radio, annotator],
+                outputs=[seg_output, download_mask_btn]
+            )
+            # 清空按钮事件
+            clear_btn.click(
+                fn=lambda: None,
+                inputs=None,
+                outputs=annotator
+            )
+            # 初始化Gallery显示
+            demo.load(
+                fn=lambda: example_images_seg.copy(),
+                outputs=example_gallery
+            )
+            # 绑定事件: 上传示例图片
+            def add_to_gallery(img_path, current_imgs):
+                if not img_path:
+                    return current_imgs
+                try:
+                    if img_path not in current_imgs:
+                        current_imgs.append(img_path)
+                    return current_imgs
+                except:
+                    return current_imgs
+            image_uploader.change(
+                fn=add_to_gallery,
+                inputs=[image_uploader, user_uploaded_examples],
+                outputs=user_uploaded_examples
+            ).then(
+                fn=lambda imgs: imgs,
+                inputs=user_uploaded_examples,
+                outputs=example_gallery
+            )
+            # 绑定事件: 点击Gallery加载
+            def load_from_gallery(evt: gr.SelectData, all_imgs):
+                if evt.index is not None and evt.index < len(all_imgs):
+                    return all_imgs[evt.index]
+                return None
+            example_gallery.select(
+                fn=load_from_gallery,
+                inputs=user_uploaded_examples,
+                outputs=annotator
+            )
+            # 绑定事件: 提交反馈
+            def submit_user_feedback(query_id, score, comment, annot_val):
+                try:
+                    img_path = annot_val[0] if annot_val and len(annot_val) > 0 else None
+                    bboxes = annot_val[1] if annot_val and len(annot_val) > 1 else []
+                    # save_feedback(
+                    #     query_id=query_id,
+                    #     feedback_type=f"score_{int(score)}",
+                    #     feedback_text=comment,
+                    #     img_path=img_path,
+                    #     bboxes=bboxes
+                    # )
+                    # 使用 HF 存储
+                    save_feedback_to_hf(
+                        query_id=query_id,
+                        feedback_type=f"score_{int(score)}",
+                        feedback_text=comment,
+                        img_path=img_path,
+                        bboxes=bboxes
+                    )
+                    return "✅ Feedback submitted, thank you!", gr.update(visible=True)
+                except Exception as e:
+                    return f"❌ Submission failed: {str(e)}", gr.update(visible=True)
+            submit_feedback_btn.click(
+                fn=submit_user_feedback,
+                inputs=[current_query_id, score_slider, feedback_box, annotator],
+                outputs=[feedback_status, feedback_status]
+            )
+        # ===== Tab 2: Counting =====
+        with gr.Tab("🔢 Counting"):
+            gr.Markdown("## Microscopy Object Counting Analysis")
+            gr.Markdown(
+                """
+                **Usage Instructions:**
+                1. Upload an image or select an example image (supports multiple formats: .png, .jpg, .tif)
+                2. (Optional) Specify a target object with a bounding box and select "Yes", or click "Run Counting" directly
+                3. Click "Run Counting"
+                4. View the density map, download the original prediction (.npy format); if needed, click "Clear Selection" to choose a new image to run
+                🤘 Rate and submit feedback to help us improve the model!
+                """
+            )
+            with gr.Row():
+                with gr.Column(scale=1):
+                    count_annotator = BBoxAnnotator(
+                        label="🖼️ Upload Image (Optional: Provide a Bounding Box)",
+                        categories=["cell"],
+                    )
+                    # Example gallery with "add" functionality
+                    with gr.Row():
+                        count_example_gallery = gr.Gallery(
+                            label="📁 Example Image Gallery",
+                            columns=len(example_images_cnt),
+                            rows=1,
+                            object_fit="cover",
+                            height=120,
+                            value=example_images_cnt.copy(),  # Initialize with examples
+                            show_download_button=False
+                        )
+                    with gr.Row():
+                        count_use_box_radio = gr.Radio(
+                            choices=["Yes", "No"],
+                            value="No",
+                            label="🔲 Specify Bounding Box?"
+                        )
+                    with gr.Row():
+                        count_btn = gr.Button("▶️ Run Counting", variant="primary", size="lg")
+                        clear_btn = gr.Button("🔄 Clear Selection", variant="secondary")
+                    # Add button to upload new examples
+                    with gr.Row():
+                        count_image_uploader = gr.File(
+                            label="➕ Add Example Image to Gallery",
+                            file_types=["image"],
+                            type="filepath"
+                        )
+                with gr.Column(scale=2):
+                    count_output = gr.Image(
+                        label="📸 Density Map",
+                        type="filepath",
+                        elem_id="density_map_output"
+                    )
+                    count_status = gr.Textbox(
+                        label="📊 Statistics",
+                        lines=2
+                    )
+                    download_density_btn = gr.File(
+                        label="📥 Download Original Prediction (.npy format)",
+                        visible=True
+                    )
+                    # Satisfaction rating
+                    score_slider = gr.Slider(
+                        minimum=1,
+                        maximum=5,
+                        step=1,
+                        value=5,
+                        label="🌟 Satisfaction Rating (1-5)"
+                    )
+                    # Feedback textbox
+                    feedback_box = gr.Textbox(
+                        placeholder="Please enter your feedback...",
+                        lines=2,
+                        label="💬 Feedback"
+                    )
+                    # Submit button
+                    submit_feedback_btn = gr.Button("💾 Submit Feedback", variant="secondary")
+                    feedback_status = gr.Textbox(
+                        label="✅ Submission Status",
+                        lines=1,
+                        visible=False
+                    )
+            # State for managing gallery images
+            count_user_examples = gr.State(example_images_cnt.copy())
+            # Function to add image to gallery
+            def add_to_count_gallery(new_img_file, current_imgs):
+                """Add uploaded image to gallery"""
+                if new_img_file is None:
+                    return current_imgs, current_imgs
+                try:
+                    # Add new image path to list
+                    if new_img_file not in current_imgs:
+                        current_imgs.append(new_img_file)
+                        print(f"✅ Added image to gallery: {new_img_file}")
+                except Exception as e:
+                    print(f"⚠️  Failed to add image: {e}")
+                return current_imgs, current_imgs
+            # When user uploads a new image file
+            count_image_uploader.upload(
+                fn=add_to_count_gallery,
+                inputs=[count_image_uploader, count_user_examples],
+                outputs=[count_user_examples, count_example_gallery]
+            )
+            # When user selects from gallery, load into annotator
+            def load_from_count_gallery(evt: gr.SelectData, all_imgs):
+                """Load selected image from gallery into annotator"""
+                if evt.index is not None and evt.index < len(all_imgs):
+                    selected_img = all_imgs[evt.index]
+                    print(f"📸 Loading image from gallery: {selected_img}")
+                    return selected_img
+                return None
+            count_example_gallery.select(
+                fn=load_from_count_gallery,
+                inputs=count_user_examples,
+                outputs=count_annotator
+            )
+            # Run counting
+            count_btn.click(
+                fn=count_cells_handler,
+                inputs=[count_use_box_radio, count_annotator],
+                outputs=[count_output, download_density_btn, count_status]
+            )
+            # 清空按钮事件
+            clear_btn.click(
+                fn=lambda: None,
+                inputs=None,
+                outputs=count_annotator
+            )
+            # 绑定事件: 提交反馈
+            def submit_user_feedback(query_id, score, comment, annot_val):
+                try:
+                    img_path = annot_val[0] if annot_val and len(annot_val) > 0 else None
+                    bboxes = annot_val[1] if annot_val and len(annot_val) > 1 else []
+                    # save_feedback(
+                    #     query_id=query_id,
+                    #     feedback_type=f"score_{int(score)}",
+                    #     feedback_text=comment,
+                    #     img_path=img_path,
+                    #     bboxes=bboxes
+                    # )
+                    # 使用 HF 存储
+                    save_feedback_to_hf(
+                        query_id=query_id,
+                        feedback_type=f"score_{int(score)}",
+                        feedback_text=comment,
+                        img_path=img_path,
+                        bboxes=bboxes
+                    )
+                    return "✅ Feedback submitted successfully, thank you!", gr.update(visible=True)
+                except Exception as e:
+                    return f"❌ Submission failed: {str(e)}", gr.update(visible=True)
+            submit_feedback_btn.click(
+                fn=submit_user_feedback,
+                inputs=[current_query_id, score_slider, feedback_box, annotator],
+                outputs=[feedback_status, feedback_status]
+            )
+        # ===== Tab 3: Tracking =====
+        with gr.Tab("🎬 Tracking"):
+            gr.Markdown("## Microscopy Object Video Tracking - Supports ZIP Upload")
+            gr.Markdown(
+                        """
+                        **Instructions:**
+                        1. Upload a ZIP file or select from the example library. The ZIP should contain a sequence of TIF images named in chronological order (e.g., t000.tif, t001.tif...)
+                        2. (Optional) Specify a target object with a bounding box on the first frame and select "Yes", or click "Run Tracking" directly
+                        3. Click "Run Tracking"
+                        4. Download the CTC format results; if needed, click "Clear Selection" to choose a new ZIP file to run
+                        🤘 Rate and submit feedback to help us improve the model!
+                        """
+                    )
+            with gr.Row():
+                with gr.Column(scale=1):
+                    track_zip_upload = gr.File(
+                        label="📦 Upload Image Sequence in ZIP File",
+                        file_types=[".zip"]
+                    )
+                    # First frame annotation for bounding box
+                    track_first_frame_annotator = BBoxAnnotator(
+                        label="🖼️ (Optional) First Frame Bounding Box Annotation",
+                        categories=["cell"],
+                        visible=False,  # Hidden initially
+                    )
+                    # Example ZIP gallery
+                    track_example_gallery = gr.Gallery(
+                        label="📁 Example Video Gallery (Click to Select)",
+                        columns=10,
+                        rows=1,
+                        height=120,
+                        object_fit="contain",
+                        show_download_button=False
+                    )
+                    with gr.Row():
+                        track_use_box_radio = gr.Radio(
+                            choices=["Yes", "No"],
+                            value="No",
+                            label="🔲 Specify Bounding Box?"
+                        )
+                    with gr.Row():
+                        track_btn = gr.Button("▶️ Run Tracking", variant="primary", size="lg")
+                        clear_btn = gr.Button("🔄 Clear Selection", variant="secondary")
+                    # Add to gallery button
+                    track_gallery_upload = gr.File(
+                        label="➕ Add ZIP to Example Gallery",
+                        file_types=[".zip"],
+                        type="filepath"
+                    )
+                with gr.Column(scale=2):
+                    track_first_frame_preview = gr.Image(
+                        label="📸 Tracking Visualization",
+                        type="filepath",
+                        # height=400,
+                        elem_classes="uniform-height",
+                        interactive=False
+                    )
+                    track_output = gr.Textbox(
+                        label="📊 Tracking Information",
+                        lines=8,
+                        interactive=False
+                    )
+                    track_download = gr.File(
+                        label="📥 Download Tracking Results (CTC Format)",
+                        visible=False
+                    )
+                    # Satisfaction rating
+                    score_slider = gr.Slider(
+                        minimum=1,
+                        maximum=5,
+                        step=1,
+                        value=5,
+                        label="🌟 Satisfaction Rating (1-5)"
+                    )
+                    # Feedback textbox
+                    feedback_box = gr.Textbox(
+                        placeholder="Please enter your feedback...",
+                        lines=2,
+                        label="💬 Feedback"
+                    )
+                    # Submit button
+                    submit_feedback_btn = gr.Button("💾 Submit Feedback", variant="secondary")
+                    feedback_status = gr.Textbox(
+                        label="✅ Submission Status",
+                        lines=1,
+                        visible=False
+                    )
+            # State for tracking examples
+            track_user_examples = gr.State(example_tracking_zips.copy())
+            # Function to get preview image from ZIP
+            def get_zip_preview(zip_path):
+                """Extract first frame from ZIP for gallery preview"""
+                try:
+                    temp_dir = tempfile.mkdtemp()
+                    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                        for member in zip_ref.namelist():
+                            basename = os.path.basename(member)
+                            if ('__MACOSX' not in member and
+                                not basename.startswith('._') and
+                                basename.lower().endswith(('.tif', '.tiff', '.png', '.jpg'))):
+                                zip_ref.extract(member, temp_dir)
+                                extracted_path = os.path.join(temp_dir, member)
+                                # Load and normalize for preview
+                                import tifffile
+                                import numpy as np
+                                img_np = tifffile.imread(extracted_path)
+                                if img_np.dtype == np.uint16:
+                                    img_min, img_max = img_np.min(), img_np.max()
+                                    if img_max > img_min:
+                                        img_np = ((img_np.astype(np.float32) - img_min) / (img_max - img_min) * 255).astype(np.uint8)
+                                if img_np.ndim == 2:
+                                    img_np = np.stack([img_np]*3, axis=-1)
+                                # Save preview
+                                preview_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+                                Image.fromarray(img_np).save(preview_path.name)
+                                return preview_path.name
+                except:
+                    pass
+                return None
+            # Initialize gallery with previews
+            def init_tracking_gallery():
+                """Create preview images for ZIP examples"""
+                previews = []
+                for zip_path in example_tracking_zips:
+                    if os.path.exists(zip_path):
+                        preview = get_zip_preview(zip_path)
+                        if preview:
+                            previews.append(preview)
+                return previews
+            # Load gallery on startup
+            demo.load(
+                fn=init_tracking_gallery,
+                outputs=track_example_gallery
+            )
+            # Add ZIP to gallery
+            def add_zip_to_gallery(zip_path, current_zips):
+                if not zip_path:
+                    return current_zips, track_example_gallery
+                try:
+                    if zip_path not in current_zips:
+                        current_zips.append(zip_path)
+                        print(f"✅ Added ZIP to gallery: {zip_path}")
+                    # Regenerate previews
+                    previews = []
+                    for zp in current_zips:
+                        preview = get_zip_preview(zp)
+                        if preview:
+                            previews.append(preview)
+                    return current_zips, previews
+                except Exception as e:
+                    print(f"⚠️ Error: {e}")
+                    return current_zips, []
+            track_gallery_upload.upload(
+                fn=add_zip_to_gallery,
+                inputs=[track_gallery_upload, track_user_examples],
+                outputs=[track_user_examples, track_example_gallery]
+            )
+            # Select ZIP from gallery
+            def load_zip_from_gallery(evt: gr.SelectData, all_zips):
+                if evt.index is not None and evt.index < len(all_zips):
+                    selected_zip = all_zips[evt.index]
+                    print(f"📁 Selected ZIP from gallery: {selected_zip}")
+                    return selected_zip
+                return None
+            track_example_gallery.select(
+                fn=load_zip_from_gallery,
+                inputs=track_user_examples,
+                outputs=track_zip_upload
+            )
+            # Load first frame when ZIP is uploaded
+            def load_first_frame_for_annotation(zip_file_obj):
+                '''Load and normalize first frame from ZIP for annotation'''
+                if zip_file_obj is None:
+                    return None, gr.update(visible=False)
+                import tifffile
+                import numpy as np
+                try:
+                    temp_dir = tempfile.mkdtemp()
+                    with zipfile.ZipFile(zip_file_obj.name, 'r') as zip_ref:
+                        for member in zip_ref.namelist():
+                            basename = os.path.basename(member)
+                            if ('__MACOSX' not in member and
+                                not basename.startswith('._') and
+                                basename.lower().endswith(('.tif', '.tiff'))):
+                                zip_ref.extract(member, temp_dir)
+                    tif_dir = find_valid_tif_dir(temp_dir)
+                    if tif_dir:
+                        first_frame = extract_first_frame(tif_dir)
+                        if first_frame:
+                            # Load and normalize the first frame
+                            try:
+                                img_np = tifffile.imread(first_frame)
+                                # Normalize to [0, 255] uint8 range for display
+                                if img_np.dtype == np.uint8:
+                                    pass  # Already uint8
+                                elif img_np.dtype == np.uint16:
+                                    # Normalize uint16 using actual min/max
+                                    img_min, img_max = img_np.min(), img_np.max()
+                                    if img_max > img_min:
+                                        img_np = ((img_np.astype(np.float32) - img_min) / (img_max - img_min) * 255).astype(np.uint8)
+                                    else:
+                                        img_np = (img_np.astype(np.float32) / 65535.0 * 255).astype(np.uint8)
+                                else:
+                                    # Float or other types
+                                    img_np = img_np.astype(np.float32)
+                                    img_min, img_max = img_np.min(), img_np.max()
+                                    if img_max > img_min:
+                                        img_np = ((img_np - img_min) / (img_max - img_min) * 255).astype(np.uint8)
+                                    else:
+                                        img_np = np.clip(img_np * 255, 0, 255).astype(np.uint8)
+                                # Convert to RGB if grayscale
+                                if img_np.ndim == 2:
+                                    img_np = np.stack([img_np]*3, axis=-1)
+                                elif img_np.ndim == 3 and img_np.shape[2] > 3:
+                                    img_np = img_np[:, :, :3]
+                                # Save normalized image to temp file
+                                temp_img = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+                                Image.fromarray(img_np).save(temp_img.name)
+                                print(f"✅ Loaded and normalized first frame: {first_frame}")
+                                print(f"   Original dtype: {tifffile.imread(first_frame).dtype}")
+                                print(f"   Normalized to uint8 RGB for annotation")
+                                return temp_img.name, gr.update(visible=True)
+                            except Exception as e:
+                                print(f"⚠️  Error normalizing first frame: {e}")
+                                import traceback
+                                traceback.print_exc()
+                                # Fallback to original file
+                                return first_frame, gr.update(visible=True)
+                except Exception as e:
+                    print(f"⚠️  Error loading first frame: {e}")
+                    import traceback
+                    traceback.print_exc()
+                return None, gr.update(visible=False)
+            # Load first frame when ZIP is uploaded
+            track_zip_upload.change(
+                fn=load_first_frame_for_annotation,
+                inputs=track_zip_upload,
+                outputs=[track_first_frame_annotator, track_first_frame_annotator]
+            )
+            # Run tracking
+            track_btn.click(
+                fn=track_video_handler,
+                inputs=[track_use_box_radio, track_first_frame_annotator, track_zip_upload],
+                outputs=[track_download, track_output, track_download, track_first_frame_preview]
+            )
+            # 清空按钮事件
+            clear_btn.click(
+                fn=lambda: None,
+                inputs=None,
+                outputs=track_first_frame_annotator
+            )
+            # 绑定事件: 提交反馈
+            def submit_user_feedback(query_id, score, comment, annot_val):
+                try:
+                    img_path = annot_val[0] if annot_val and len(annot_val) > 0 else None
+                    bboxes = annot_val[1] if annot_val and len(annot_val) > 1 else []
+                    # save_feedback(
+                    #     query_id=query_id,
+                    #     feedback_type=f"score_{int(score)}",
+                    #     feedback_text=comment,
+                    #     img_path=img_path,
+                    #     bboxes=bboxes
+                    # )
+                    # 使用 HF 存储
+                    save_feedback_to_hf(
+                        query_id=query_id,
+                        feedback_type=f"score_{int(score)}",
+                        feedback_text=comment,
+                        img_path=img_path,
+                        bboxes=bboxes
+                    )
+                    return "✅ Feedback submitted successfully, thank you!", gr.update(visible=True)
+                except Exception as e:
+                    return f"❌ Submission failed: {str(e)}", gr.update(visible=True)
+            submit_feedback_btn.click(
+                fn=submit_user_feedback,
+                inputs=[current_query_id, score_slider, feedback_box, annotator],
+                outputs=[feedback_status, feedback_status]
+            )
+    gr.Markdown(
+        """
+        ---
+        ### 💡 Technical Details
+        **MicroscopyMatching** - A general-purpose microscopy image analysis toolkit based on Stable Diffusion
+        """
+    )
+if __name__ == "__main__":
+    demo.queue().launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        ssr_mode=False,
+        show_error=True,
+    )

config.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List
+@dataclass
+class RunConfig:
+    # Guiding text prompt
+    prompt: str = "<task-prompt>"
+    # Whether to use Stable Diffusion v2.1
+    sd_2_1: bool = False
+    # Which token indices to alter with attend-and-excite
+    token_indices: List[int] = field(default_factory=lambda: [2,5])
+    # Which random seeds to use when generating
+    seeds: List[int] = field(default_factory=lambda: [42])
+    # Path to save all outputs to
+    output_path: Path = Path('./outputs')
+    # Number of denoising steps
+    n_inference_steps: int = 50
+    # Text guidance scale
+    guidance_scale: float = 7.5
+    # Number of denoising steps to apply attend-and-excite
+    max_iter_to_alter: int = 25
+    # Resolution of UNet to compute attention maps over
+    attention_res: int = 16
+    # Whether to run standard SD or attend-and-excite
+    run_standard_sd: bool = False
+    # Dictionary defining the iterations and desired thresholds to apply iterative latent refinement in
+    thresholds: Dict[int, float] = field(default_factory=lambda: {0: 0.05, 10: 0.5, 20: 0.8})
+    # Scale factor for updating the denoised latent z_t
+    scale_factor: int = 20
+    # Start and end values used for scaling the scale factor - decays linearly with the denoising timestep
+    scale_range: tuple = field(default_factory=lambda: (1.0, 0.5))
+    # Whether to apply the Gaussian smoothing before computing the maximum attention value for each subject token
+    smooth_attentions: bool = True
+    # Standard deviation for the Gaussian smoothing
+    sigma: float = 0.5
+    # Kernel size for the Gaussian smoothing
+    kernel_size: int = 3
+    # Whether to save cross attention maps for the final results
+    save_cross_attention_maps: bool = False
+    def __post_init__(self):
+        self.output_path.mkdir(exist_ok=True, parents=True)

counting.py ADDED Viewed

	@@ -0,0 +1,340 @@

+# stable diffusion x loca
+import os
+import pprint
+from typing import Any, List, Optional
+import argparse
+from huggingface_hub import hf_hub_download
+import pyrallis
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+import torch
+import os
+from PIL import Image
+import numpy as np
+from config import RunConfig
+from _utils import attn_utils_new as attn_utils
+from _utils.attn_utils import AttentionStore
+from _utils.misc_helper import *
+import torch.nn.functional as F
+import matplotlib.pyplot as plt
+import cv2
+import warnings
+from pytorch_lightning.callbacks import ModelCheckpoint
+warnings.filterwarnings("ignore", category=UserWarning)
+import pytorch_lightning as pl
+from _utils.load_models import load_stable_diffusion_model
+from models.model import Counting_with_SD_features_loca as Counting
+from pytorch_lightning.loggers import WandbLogger
+from models.enc_model.loca_args import get_argparser as loca_get_argparser
+from models.enc_model.loca import build_model as build_loca_model
+import time
+import torchvision.transforms as T
+import skimage.io as io
+SCALE = 1
+class CountingModule(pl.LightningModule):
+    def __init__(self, use_box=True):
+        super().__init__()
+        self.use_box = use_box
+        self.config = RunConfig()   # config for stable diffusion
+        self.initialize_model()
+    def initialize_model(self):
+        # load loca model
+        loca_args = loca_get_argparser().parse_args()
+        self.loca_model = build_loca_model(loca_args)
+        # weights = torch.load("ckpt/loca_few_shot.pt")["model"]
+        # weights = {k.replace("module","") : v for k, v in weights.items()}
+        # self.loca_model.load_state_dict(weights, strict=False)
+        # del weights
+        self.counting_adapter = Counting(scale_factor=SCALE)
+        # if os.path.isfile(self.args.adapter_weight):
+        #     adapter_weight = torch.load(self.args.adapter_weight,map_location=torch.device('cpu'))
+        #     self.counting_adapter.load_state_dict(adapter_weight, strict=False)
+        ### load stable diffusion and its controller
+        self.stable = load_stable_diffusion_model(config=self.config)
+        self.noise_scheduler = self.stable.scheduler
+        self.controller = AttentionStore(max_size=64)
+        attn_utils.register_attention_control(self.stable, self.controller)
+        attn_utils.register_hier_output(self.stable)
+        ##### initialize token_emb #####
+        placeholder_token = "<task-prompt>"
+        self.task_token = "repetitive objects"
+        # Add the placeholder token in tokenizer
+        num_added_tokens = self.stable.tokenizer.add_tokens(placeholder_token)
+        if num_added_tokens == 0:
+            raise ValueError(
+                f"The tokenizer already contains the token {placeholder_token}. Please pass a different"
+                " `placeholder_token` that is not already in the tokenizer."
+            )
+        try:
+            task_embed_from_pretrain = hf_hub_download(
+                repo_id="phoebe777777/111",
+                filename="task_embed.pth",
+                token=None,
+                force_download=False
+            )
+            placeholder_token_id = self.stable.tokenizer.convert_tokens_to_ids(placeholder_token)
+            self.stable.text_encoder.resize_token_embeddings(len(self.stable.tokenizer))
+            token_embeds = self.stable.text_encoder.get_input_embeddings().weight.data
+            token_embeds[placeholder_token_id] = task_embed_from_pretrain
+        except:
+            initializer_token = "count"
+            token_ids = self.stable.tokenizer.encode(initializer_token, add_special_tokens=False)
+            # Check if initializer_token is a single token or a sequence of tokens
+            if len(token_ids) > 1:
+                raise ValueError("The initializer token must be a single token.")
+            initializer_token_id = token_ids[0]
+            placeholder_token_id = self.stable.tokenizer.convert_tokens_to_ids(placeholder_token)
+            self.stable.text_encoder.resize_token_embeddings(len(self.stable.tokenizer))
+            token_embeds = self.stable.text_encoder.get_input_embeddings().weight.data
+            token_embeds[placeholder_token_id] = token_embeds[initializer_token_id]
+        # others
+        self.placeholder_token = placeholder_token
+        self.placeholder_token_id = placeholder_token_id
+    def move_to_device(self, device):
+        self.stable.to(device)
+        if self.loca_model is not None and self.counting_adapter is not None:
+            self.loca_model.to(device)
+            self.counting_adapter.to(device)
+        self.to(device)
+    def forward(self, data_path, box=None):
+        filename = data_path.split("/")[-1]
+        img = Image.open(data_path).convert("RGB")
+        width, height = img.size
+        input_image = T.Compose([T.ToTensor(), T.Resize((512, 512))])(img)
+        input_image_stable = input_image - 0.5
+        input_image = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(input_image)
+        if box is not None:
+            boxes = torch.tensor(box) / torch.tensor([width, height, width, height]) * 512  # xyxy, normalized
+            assert self.use_box == True
+        else:
+            boxes = torch.tensor([[100,100,130,130], [200,200,250,250]], dtype=torch.float32)  # dummy box
+            assert self.use_box == False
+        # move to device
+        input_image = input_image.unsqueeze(0).to(self.device)
+        boxes = boxes.unsqueeze(0).to(self.device)
+        input_image_stable = input_image_stable.unsqueeze(0).to(self.device)
+        latents = self.stable.vae.encode(input_image_stable).latent_dist.sample().detach()
+        latents = latents * 0.18215
+        # Sample noise that we'll add to the latents
+        noise = torch.randn_like(latents)
+        timesteps = torch.tensor([20], device=latents.device).long()
+        noisy_latents = self.noise_scheduler.add_noise(latents, noise, timesteps)
+        input_ids_ = self.stable.tokenizer(
+            self.placeholder_token + " repetitive objects",
+            # "object",
+            padding="max_length",
+            truncation=True,
+            max_length=self.stable.tokenizer.model_max_length,
+            return_tensors="pt",
+        )
+        input_ids = input_ids_["input_ids"].to(self.device)
+        attention_mask = input_ids_["attention_mask"].to(self.device)
+        encoder_hidden_states = self.stable.text_encoder(input_ids, attention_mask)[0]
+        input_image = input_image.to(self.device)
+        boxes = boxes.to(self.device)
+        task_loc_idx = torch.nonzero(input_ids == self.placeholder_token_id)
+        if self.use_box:
+            loca_out = self.loca_model.forward_before_reg(input_image, boxes)
+            loca_feature_bf_regression =  loca_out["feature_bf_regression"]
+            adapted_emb = self.counting_adapter.adapter(loca_feature_bf_regression, boxes)      # shape [1, 768]
+            if task_loc_idx.shape[0] == 0:
+                encoder_hidden_states[0,2,:] = adapted_emb.squeeze()  # 放在task prompt下一位
+            else:
+                encoder_hidden_states[0,task_loc_idx[0, 1]+1,:] = adapted_emb.squeeze()  # 放在task prompt下一位
+        # Predict the noise residual
+        noise_pred, feature_list = self.stable.unet(noisy_latents, timesteps, encoder_hidden_states)
+        noise_pred = noise_pred.sample
+        attention_store = self.controller.attention_store
+        attention_maps = []
+        exemplar_attention_maps = []
+        exemplar_attention_maps1 = []
+        exemplar_attention_maps2 = []
+        exemplar_attention_maps3 = []
+        cross_self_task_attn_maps = []
+        cross_self_exe_attn_maps = []
+        # only use 64x64 self-attention
+        self_attn_aggregate = attn_utils.aggregate_attention( # [res, res, 4096]
+                prompts=[self.config.prompt],        # 这里要改么
+                attention_store=self.controller,
+                res=64,
+                from_where=("up", "down"),
+                is_cross=False,
+                select=0
+            )
+        self_attn_aggregate32 = attn_utils.aggregate_attention( # [res, res, 4096]
+                prompts=[self.config.prompt],        # 这里要改么
+                attention_store=self.controller,
+                res=32,
+                from_where=("up", "down"),
+                is_cross=False,
+                select=0
+            )
+        self_attn_aggregate16 = attn_utils.aggregate_attention( # [res, res, 4096]
+                prompts=[self.config.prompt],        # 这里要改么
+                attention_store=self.controller,
+                res=16,
+                from_where=("up", "down"),
+                is_cross=False,
+                select=0
+            )
+        # cross attention
+        for res in [32, 16]:
+            attn_aggregate = attn_utils.aggregate_attention( # [res, res, 77]
+                prompts=[self.config.prompt],        # 这里要改么
+                attention_store=self.controller,
+                res=res,
+                from_where=("up", "down"),
+                is_cross=True,
+                select=0
+            )
+            task_attn_ = attn_aggregate[:, :, 1].unsqueeze(0).unsqueeze(0) # [1, 1, res, res]
+            attention_maps.append(task_attn_)
+            if self.use_box:
+                exemplar_attns = attn_aggregate[:, :, 2].unsqueeze(0).unsqueeze(0) # 取exemplar的attn
+                exemplar_attention_maps.append(exemplar_attns)
+            else:
+                exemplar_attns1 = attn_aggregate[:, :, 2].unsqueeze(0).unsqueeze(0)
+                exemplar_attns2 = attn_aggregate[:, :, 3].unsqueeze(0).unsqueeze(0)
+                exemplar_attns3 = attn_aggregate[:, :, 4].unsqueeze(0).unsqueeze(0)
+                exemplar_attention_maps1.append(exemplar_attns1)
+                exemplar_attention_maps2.append(exemplar_attns2)
+                exemplar_attention_maps3.append(exemplar_attns3)
+        scale_factors = [(64 // attention_maps[i].shape[-1]) for i in range(len(attention_maps))]
+        attns = torch.cat([F.interpolate(attention_maps[i_], scale_factor=scale_factors[i_], mode="bilinear") for i_ in range(len(attention_maps))])
+        task_attn_64 = torch.mean(attns, dim=0, keepdim=True)
+        cross_self_task_attn = attn_utils.self_cross_attn(self_attn_aggregate, task_attn_64)
+        cross_self_task_attn_maps.append(cross_self_task_attn)
+        if self.use_box:
+            scale_factors = [(64 // exemplar_attention_maps[i].shape[-1]) for i in range(len(exemplar_attention_maps))]
+            attns = torch.cat([F.interpolate(exemplar_attention_maps[i_], scale_factor=scale_factors[i_], mode="bilinear") for i_ in range(len(exemplar_attention_maps))])
+            exemplar_attn_64 = torch.mean(attns, dim=0, keepdim=True)
+            cross_self_exe_attn = attn_utils.self_cross_attn(self_attn_aggregate, exemplar_attn_64)
+            cross_self_exe_attn_maps.append(cross_self_exe_attn)
+        else:
+            scale_factors = [(64 // exemplar_attention_maps1[i].shape[-1]) for i in range(len(exemplar_attention_maps1))]
+            attns = torch.cat([F.interpolate(exemplar_attention_maps1[i_], scale_factor=scale_factors[i_], mode="bilinear") for i_ in range(len(exemplar_attention_maps1))])
+            exemplar_attn_64_1 = torch.mean(attns, dim=0, keepdim=True)
+            scale_factors = [(64 // exemplar_attention_maps2[i].shape[-1]) for i in range(len(exemplar_attention_maps2))]
+            attns = torch.cat([F.interpolate(exemplar_attention_maps2[i_], scale_factor=scale_factors[i_], mode="bilinear") for i_ in range(len(exemplar_attention_maps2))])
+            exemplar_attn_64_2 = torch.mean(attns, dim=0, keepdim=True)
+            scale_factors = [(64 // exemplar_attention_maps3[i].shape[-1]) for i in range(len(exemplar_attention_maps3))]
+            attns = torch.cat([F.interpolate(exemplar_attention_maps3[i_], scale_factor=scale_factors[i_], mode="bilinear") for i_ in range(len(exemplar_attention_maps3))])
+            exemplar_attn_64_3 = torch.mean(attns, dim=0, keepdim=True)
+            cross_self_task_attn = attn_utils.self_cross_attn(self_attn_aggregate, task_attn_64)
+            cross_self_task_attn_maps.append(cross_self_task_attn)
+            # if self.args.merge_exemplar == "average":
+            cross_self_exe_attn1 = attn_utils.self_cross_attn(self_attn_aggregate, exemplar_attn_64_1)
+            cross_self_exe_attn2 = attn_utils.self_cross_attn(self_attn_aggregate, exemplar_attn_64_2)
+            cross_self_exe_attn3 = attn_utils.self_cross_attn(self_attn_aggregate, exemplar_attn_64_3)
+            exemplar_attn_64 = (exemplar_attn_64_1 + exemplar_attn_64_2 + exemplar_attn_64_3) / 3
+            cross_self_exe_attn = (cross_self_exe_attn1 + cross_self_exe_attn2 + cross_self_exe_attn3) / 3
+            exemplar_attn_64 = (exemplar_attn_64 - exemplar_attn_64.min()) / (exemplar_attn_64.max() - exemplar_attn_64.min() + 1e-6)
+        attn_stack = [exemplar_attn_64 / 2, cross_self_exe_attn / 2, exemplar_attn_64, cross_self_exe_attn]
+        attn_stack = torch.cat(attn_stack, dim=1)
+        if not self.use_box:
+            # cross_self_exe_attn_np = cross_self_exe_attn.detach().squeeze().cpu().numpy()
+            # boxes = gen_dummy_boxes(cross_self_exe_attn_np, max_boxes=1)
+            # boxes = boxes.to(self.device)
+            loca_out = self.loca_model.forward_before_reg(input_image, boxes)
+            loca_feature_bf_regression =  loca_out["feature_bf_regression"]
+        attn_out = self.loca_model.forward_reg(loca_out, attn_stack, feature_list[-1])
+        pred_density = attn_out["pred"].squeeze().cpu().numpy()
+        pred_cnt = pred_density.sum().item()
+        # resize pred_density to original image size
+        pred_density_rsz = cv2.resize(pred_density, (width, height), interpolation=cv2.INTER_CUBIC)
+        pred_density_rsz = pred_density_rsz / pred_density_rsz.sum() * pred_cnt
+        return pred_density_rsz, pred_cnt
+def inference(data_path, box=None, save_path="./example_imgs", visualize=False):
+    if box is not None:
+        use_box = True
+    else:
+        use_box = False
+    model = CountingModule(use_box=use_box)
+    load_msg = model.load_state_dict(torch.load("pretrained/microscopy_matching_cnt.pth"), strict=True)
+    model.eval()
+    with torch.no_grad():
+        density_map, cnt = model(data_path, box)
+    if visualize:
+        img = io.imread(data_path)
+        if len(img.shape) == 3 and img.shape[2] > 3:
+            img = img[:,:,:3]
+        if len(img.shape) == 2:
+            img = np.stack([img]*3, axis=-1)
+        img_show = img.squeeze()
+        density_map_show = density_map.squeeze()
+        os.makedirs(save_path, exist_ok=True)
+        filename = data_path.split("/")[-1]
+        img_show = (img_show - np.min(img_show)) / (np.max(img_show) - np.min(img_show))
+        fig, ax = plt.subplots(1,2, figsize=(12,6))
+        ax[0].imshow(img_show)
+        ax[0].axis('off')
+        ax[0].set_title(f"Input image")
+        ax[1].imshow(img_show)
+        ax[1].imshow(density_map_show, cmap='jet', alpha=0.5)  # Overlay density map with some transparency
+        ax[1].axis('off')
+        ax[1].set_title(f"Predicted density map, count: {cnt:.1f}")
+        plt.tight_layout()
+        plt.savefig(os.path.join(save_path, filename.split(".")[0]+"_cnt.png"), dpi=300)
+        plt.close()
+    return density_map
+def main():
+    inference(
+        data_path = "example_imgs/1977_Well_F-5_Field_1.png",
+        # box=[[150, 60, 183, 87]],
+        save_path = "./example_imgs",
+        visualize = True
+    )
+if __name__ == "__main__":
+    main()

example_imgs/cnt/047cell.png ADDED Viewed

Git LFS Details

SHA256: 3c9fc3d2ab7beecb16d850b1ef82d70a7f7011051d0199f866bc31c42c296d42
Pointer size: 130 Bytes
Size of remote file: 72.8 kB

example_imgs/cnt/62_10.png ADDED Viewed

Git LFS Details

SHA256: b93c916a81eaec1a3511b9379fa293c026bbe74977bc21fc7666a83c92d3b122
Pointer size: 130 Bytes
Size of remote file: 91.5 kB

example_imgs/cnt/6800-17000_GTEX-XQ3S_Adipose-Subcutaneous.png ADDED Viewed

Git LFS Details

SHA256: 467319789c5b5b6c370a23c126c33044a841a115cf24b79f75106b5521cd5c44
Pointer size: 130 Bytes
Size of remote file: 79.9 kB

example_imgs/seg/003_img.png ADDED Viewed

Git LFS Details

SHA256: 41515cf5d7405135db4656c2cc61b59ab341143bfbee952b44a9542944e8528f
Pointer size: 131 Bytes
Size of remote file: 302 kB

example_imgs/seg/1-23 [Scan I08].png ADDED Viewed

Git LFS Details

SHA256: a96dfccdd794a95c9907b0eedecbd53dee078943d9a3dcdb43e11a36d34f5a1f
Pointer size: 132 Bytes
Size of remote file: 1.42 MB

example_imgs/seg/10X_B2_Tile-15.aligned.png ADDED Viewed

Git LFS Details

SHA256: e8dce16565ccfb055438b0b65d9e70b5be6cc36c61a964eed53d7ec782b5afa3
Pointer size: 132 Bytes
Size of remote file: 1.52 MB

example_imgs/seg/1977_Well_F-5_Field_1.png ADDED Viewed

Git LFS Details

SHA256: 145a99e724048ed40db7843e57a1d93cd2e1f6e221d167a29b732740d6302c52
Pointer size: 132 Bytes
Size of remote file: 2.43 MB

example_imgs/seg/200972823[5179]_RhoGGG_YAP_TAZ [200972823 Well K6 Field #2].png ADDED Viewed

Git LFS Details

SHA256: 56bd7a8df07d66ff5f8dac67aa116efe0869f6c46d9ce77e595535a6acd60ae9
Pointer size: 132 Bytes
Size of remote file: 1.39 MB

example_imgs/seg/A172_Phase_C7_1_00d00h00m_1.png ADDED Viewed

Git LFS Details

SHA256: f57430b87923f5de9a5799cc84016aeb5d99cd5068481a9fedae2a68fa9bba43
Pointer size: 131 Bytes
Size of remote file: 159 kB

example_imgs/seg/JE2NileRed_oilp22_PMP_101220_011_NR.png ADDED Viewed

Git LFS Details

SHA256: bdf31a4eab7826435407f2f88bfeee8f95c2b04d8f579cf6281b7f5838195b03
Pointer size: 130 Bytes
Size of remote file: 64.4 kB

example_imgs/seg/OpenTest_031.png ADDED Viewed

Git LFS Details

SHA256: 973ecd4ca18c650d630491c1f3531ba4ff20c12a37728dc79f279b26651d0c82
Pointer size: 131 Bytes
Size of remote file: 966 kB

example_imgs/seg/X_24.png ADDED Viewed

Git LFS Details

SHA256: 514b2df4bdcdd1d09d1f032284a5c2aaa0572d2f1ec148b256e4bbf5d68eb3c7
Pointer size: 131 Bytes
Size of remote file: 102 kB

example_imgs/seg/exp_A01_G002_0001.oir.png ADDED Viewed

Git LFS Details

SHA256: 9c22531659320908a688da277b7f67b70aafb450e035f56e3962ebfd3423140f
Pointer size: 132 Bytes
Size of remote file: 1.69 MB

example_imgs/tra/tracking_test_sequence.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bda69434e3de8103c98313777640acd35fc7501eec4b1528456304142b18797f
+size 10392163

example_imgs/tra/tracking_test_sequence2.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:120cc2a75a4dd571b8f8ee7ea363a9b82a2b4c516376ccf4f287b6864d2dd576
+size 2288296

inference_count.py ADDED Viewed

	@@ -0,0 +1,237 @@

+# inference_count.py
+# 计数模型推理模块 - 独立版本
+import torch
+import numpy as np
+from PIL import Image
+import matplotlib.pyplot as plt
+import tempfile
+import os
+from huggingface_hub import hf_hub_download
+from counting import CountingModule
+MODEL = None
+DEVICE = torch.device("cpu")
+def load_model(use_box=False):
+    """
+    加载计数模型
+    Args:
+        use_box: 是否使用边界框
+    Returns:
+        model: 加载的模型
+        device: 设备
+    """
+    global MODEL, DEVICE
+    try:
+        print("🔄 Loading counting model...")
+        # 初始化模型
+        MODEL = CountingModule(use_box=use_box)
+        # 从 Hugging Face Hub 下载权重
+        ckpt_path = hf_hub_download(
+            repo_id="phoebe777777/111",
+            filename="microscopy_matching_cnt.pth",
+            token=None,
+            force_download=False
+        )
+        print(f"✅ Checkpoint downloaded: {ckpt_path}")
+        # 加载权重
+        MODEL.load_state_dict(
+            torch.load(ckpt_path, map_location="cpu"),
+            strict=True
+        )
+        MODEL.eval()
+        if torch.cuda.is_available():
+            DEVICE = torch.device("cuda")
+            MODEL.move_to_device(DEVICE)
+            print("✅ Model moved to CUDA")
+        else:
+            DEVICE = torch.device("cpu")
+            MODEL.move_to_device(DEVICE)
+            print("✅ Model on CPU")
+        print("✅ Counting model loaded successfully")
+        return MODEL, DEVICE
+    except Exception as e:
+        print(f"❌ Error loading counting model: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, torch.device("cpu")
+@torch.no_grad()
+def run(model, img_path, box=None, device="cpu", visualize=True):
+    """
+    运行计数推理
+    Args:
+        model: 计数模型
+        img_path: 图像路径
+        box: 边界框 [[x1, y1, x2, y2], ...] 或 None
+        device: 设备
+        visualize: 是否生成可视化
+    Returns:
+        result_dict: {
+            'density_map': numpy array,
+            'count': float,
+            'visualized_path': str (如果 visualize=True)
+        }
+    """
+    print("DEVICE:", device)
+    model.move_to_device(device)
+    model.eval()
+    if box is not None:
+        use_box = True
+    else:
+        use_box = False
+    model.use_box = use_box
+    if model is None:
+        return {
+            'density_map': None,
+            'count': 0,
+            'visualized_path': None,
+            'error': 'Model not loaded'
+        }
+    try:
+        print(f"🔄 Running counting inference on {img_path}")
+        # 运行推理 (调用你的模型的 forward 方法)
+        with torch.no_grad():
+            density_map, count = model(img_path, box)
+        print(f"✅ Counting result: {count:.1f} objects")
+        result = {
+            'density_map': density_map,
+            'count': count,
+            'visualized_path': None
+        }
+        # 可视化
+        # if visualize:
+        #     viz_path = visualize_result(img_path, density_map, count)
+        #     result['visualized_path'] = viz_path
+        return result
+    except Exception as e:
+        print(f"❌ Counting inference error: {e}")
+        import traceback
+        traceback.print_exc()
+        return {
+            'density_map': None,
+            'count': 0,
+            'visualized_path': None,
+            'error': str(e)
+        }
+def visualize_result(image_path, density_map, count):
+    """
+    可视化计数结果 (与你原来的可视化代码一致)
+    Args:
+        image_path: 原始图像路径
+        density_map: 密度图 (numpy array)
+        count: 计数值
+    Returns:
+        output_path: 可视化结果的临时文件路径
+    """
+    try:
+        import skimage.io as io
+        # 读取原始图像
+        img = io.imread(image_path)
+        # 处理不同格式的图像
+        if len(img.shape) == 3 and img.shape[2] > 3:
+            img = img[:, :, :3]
+        if len(img.shape) == 2:
+            img = np.stack([img]*3, axis=-1)
+        # 归一化显示
+        img_show = img.squeeze()
+        density_map_show = density_map.squeeze()
+        # 归一化图像
+        img_show = (img_show - np.min(img_show)) / (np.max(img_show) - np.min(img_show) + 1e-8)
+        # 创建可视化 (与你原来的代码一致)
+        fig, ax = plt.subplots(figsize=(8, 6))
+        # 右图: 密度图叠加
+        ax.imshow(img_show)
+        ax.imshow(density_map_show, cmap='jet', alpha=0.5)
+        ax.axis('off')
+        # ax.set_title(f"Predicted density map, count: {count:.1f}")
+        plt.tight_layout()
+        # 保存到临时文件
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
+        plt.savefig(temp_file.name, dpi=300)
+        plt.close()
+        print(f"✅ Visualization saved to {temp_file.name}")
+        return temp_file.name
+    except Exception as e:
+        print(f"❌ Visualization error: {e}")
+        import traceback
+        traceback.print_exc()
+        return image_path
+# ===== 测试代码 =====
+if __name__ == "__main__":
+    print("="*60)
+    print("Testing Counting Model")
+    print("="*60)
+    # 测试模型加载
+    model, device = load_model(use_box=False)
+    if model is not None:
+        print("\n" + "="*60)
+        print("Model loaded successfully, testing inference...")
+        print("="*60)
+        # 测试推理
+        test_image = "example_imgs/1977_Well_F-5_Field_1.png"
+        if os.path.exists(test_image):
+            result = run(
+                model,
+                test_image,
+                box=None,
+                device=device,
+                visualize=True
+            )
+            if 'error' not in result:
+                print("\n" + "="*60)
+                print("Inference Results:")
+                print("="*60)
+                print(f"Count: {result['count']:.1f}")
+                print(f"Density map shape: {result['density_map'].shape}")
+                if result['visualized_path']:
+                    print(f"Visualization saved to: {result['visualized_path']}")
+            else:
+                print(f"\n❌ Inference failed: {result['error']}")
+        else:
+            print(f"\n⚠️ Test image not found: {test_image}")
+    else:
+        print("\n❌ Model loading failed")

inference_seg.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+import numpy as np
+from huggingface_hub import hf_hub_download
+from segmentation import SegmentationModule
+MODEL = None
+DEVICE = torch.device("cpu")
+def load_model(use_box=False):
+    global MODEL, DEVICE
+    MODEL = SegmentationModule(use_box=use_box)
+    ckpt_path = hf_hub_download(
+        repo_id="phoebe777777/111",
+        filename="microscopy_matching_seg.pth",
+        token=None,
+        force_download=False
+    )
+    MODEL.load_state_dict(torch.load(ckpt_path, map_location="cpu"), strict=False)
+    MODEL.eval()
+    if torch.cuda.is_available():
+        DEVICE = torch.device("cuda")
+        MODEL.move_to_device(DEVICE)
+        print("✅ Model moved to CUDA")
+    else:
+        DEVICE = torch.device("cpu")
+        MODEL.move_to_device(DEVICE)
+        print("✅ Model on CPU")
+    return MODEL, DEVICE
+@torch.no_grad()
+def run(model, img_path, box=None, device="cpu"):
+    print("DEVICE:", device)
+    model.move_to_device(device)
+    model.eval()
+    with torch.no_grad():
+        if box is not None:
+            use_box = True
+        else:
+            use_box = False
+        model.use_box = use_box
+        output = model(img_path, box=box)
+    mask = output
+    return mask
+# import os
+# import torch
+# import numpy as np
+# from huggingface_hub import hf_hub_download
+# from segmentation import SegmentationModule
+# MODEL = None
+# DEVICE = torch.device("cpu")
+# def load_model(use_box=False):
+#     global MODEL, DEVICE
+#     # === 优化1: 使用 /data 缓存模型，避免写入 .cache ===
+#     cache_dir = "/data/cellseg_model_cache"
+#     os.makedirs(cache_dir, exist_ok=True)
+#     ckpt_path = hf_hub_download(
+#         repo_id="Shengxiao0709/cellsegmodel",
+#         filename="microscopy_matching_seg.pth",
+#         token=None,
+#         local_dir=cache_dir,              # ✅ 下载到 /data
+#         local_dir_use_symlinks=False,     # ✅ 避免软链接问题
+#         force_download=False              # ✅ 已存在时不重复下载
+#     )
+#     # === 优化2: 加载模型 ===
+#     MODEL = SegmentationModule(use_box=use_box)
+#     state_dict = torch.load(ckpt_path, map_location="cpu")
+#     MODEL.load_state_dict(state_dict, strict=False)
+#     MODEL.eval()
+#     DEVICE = torch.device("cpu")
+#     print(f"✅ Model loaded from {ckpt_path}")
+#     return MODEL, DEVICE
+# @torch.no_grad()
+# def run(model, img_path, box=None, device="cpu"):
+#     output = model(img_path, box=box)
+#     mask = output["pred"]
+#     mask = (mask > 0).astype(np.uint8)
+#     return mask

inference_track.py ADDED Viewed

	@@ -0,0 +1,202 @@

+# inference_track.py
+# 视频跟踪模型推理模块
+import torch
+import numpy as np
+import os
+from pathlib import Path
+from tqdm import tqdm
+from huggingface_hub import hf_hub_download
+from tracking_one import TrackingModule
+from models.tra_post_model.trackastra.tracking import graph_to_ctc
+MODEL = None
+DEVICE = torch.device("cpu")
+def load_model(use_box=False):
+    """
+    加载跟踪模型
+    Args:
+        use_box: 是否使用边界框
+    Returns:
+        model: 加载的模型
+        device: 设备
+    """
+    global MODEL, DEVICE
+    try:
+        print("🔄 Loading tracking model...")
+        # 初始化模型
+        MODEL = TrackingModule(use_box=use_box)
+        # 从 Hugging Face Hub 下载权重
+        ckpt_path = hf_hub_download(
+            repo_id="phoebe777777/111",
+            filename="microscopy_matching_tra.pth",
+            token=None,
+            force_download=False
+        )
+        print(f"✅ Checkpoint downloaded: {ckpt_path}")
+        # 加载权重
+        MODEL.load_state_dict(
+            torch.load(ckpt_path, map_location="cpu"),
+            strict=True
+        )
+        MODEL.eval()
+        # 设置设备
+        if torch.cuda.is_available():
+            DEVICE = torch.device("cuda")
+            MODEL.move_to_device(DEVICE)
+            print("✅ Model moved to CUDA")
+        else:
+            DEVICE = torch.device("cpu")
+            MODEL.move_to_device(DEVICE)
+            print("✅ Model on CPU")
+        print("✅ Tracking model loaded successfully")
+        return MODEL, DEVICE
+    except Exception as e:
+        print(f"❌ Error loading tracking model: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, torch.device("cpu")
+@torch.no_grad()
+def run(model, video_dir, box=None, device="cpu", output_dir="tracked_results"):
+    """
+    运行视频跟踪推理
+    Args:
+        model: 跟踪模型
+        video_dir: 视频帧序列目录 (包含连续的图像文件)
+        box: 边界框 (可选)
+        device: 设备
+        output_dir: 输出目录
+    Returns:
+        result_dict: {
+            'track_graph': TrackGraph对象,
+            'masks': 分割掩码数组 (T, H, W),
+            'output_dir': 输出目录路径,
+            'num_tracks': 跟踪轨迹数量
+        }
+    """
+    if model is None:
+        return {
+            'track_graph': None,
+            'masks': None,
+            'output_dir': None,
+            'num_tracks': 0,
+            'error': 'Model not loaded'
+        }
+    try:
+        print(f"🔄 Running tracking inference on {video_dir}")
+        # 运行跟踪
+        track_graph, masks = model.track(
+            file_dir=video_dir,
+            boxes=box,
+            mode="greedy",  # 可选: "greedy", "greedy_nodiv", "ilp"
+            dataname="tracking_result"
+        )
+        # 创建输出目录
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+        # 转换为CTC格式并保存
+        print("🔄 Converting to CTC format...")
+        ctc_tracks, masks_tracked = graph_to_ctc(
+            track_graph,
+            masks,
+            outdir=output_dir,
+        )
+        print(f"✅ CTC results saved to {output_dir}")
+        # num_tracks = len(track_graph.tracks())
+        print(f"✅ Tracking completed")
+        result = {
+            'track_graph': track_graph,
+            'masks': masks,
+            'masks_tracked': masks_tracked,
+            'output_dir': output_dir,
+            # 'num_tracks': num_tracks
+        }
+        return result
+    except Exception as e:
+        print(f"❌ Tracking inference error: {e}")
+        import traceback
+        traceback.print_exc()
+        return {
+            'track_graph': None,
+            'masks': None,
+            'output_dir': None,
+            'num_tracks': 0,
+            'error': str(e)
+        }
+def visualize_tracking_result(masks_tracked, output_path):
+    """
+    可视化跟踪结果 (可选)
+    Args:
+        masks_tracked: 跟踪后的掩码 (T, H, W)
+        output_path: 输出视频路径
+    Returns:
+        output_path: 视频文件路径
+    """
+    try:
+        import cv2
+        import matplotlib.pyplot as plt
+        from matplotlib import cm
+        # 获取时间帧数
+        T, H, W = masks_tracked.shape
+        # 创建颜色映射
+        unique_ids = np.unique(masks_tracked)
+        num_colors = len(unique_ids)
+        cmap = cm.get_cmap('tab20', num_colors)
+        # 创建视频写入器
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, 5.0, (W, H))
+        for t in range(T):
+            frame = masks_tracked[t]
+            # 创建彩色图像
+            colored_frame = np.zeros((H, W, 3), dtype=np.uint8)
+            for i, obj_id in enumerate(unique_ids):
+                if obj_id == 0:
+                    continue
+                mask = (frame == obj_id)
+                color = np.array(cmap(i % num_colors)[:3]) * 255
+                colored_frame[mask] = color
+            # 转换为BGR (OpenCV格式)
+            colored_frame_bgr = cv2.cvtColor(colored_frame, cv2.COLOR_RGB2BGR)
+            out.write(colored_frame_bgr)
+        out.release()
+        print(f"✅ Visualization saved to {output_path}")
+        return output_path
+    except Exception as e:
+        print(f"❌ Visualization error: {e}")
+        return None

models/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

models/enc_model/__init__.py ADDED Viewed

File without changes

models/enc_model/backbone.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision import models
+from torchvision.ops.misc import FrozenBatchNorm2d
+class Backbone(nn.Module):
+    def __init__(
+        self,
+        name: str,
+        pretrained: bool,
+        dilation: bool,
+        reduction: int,
+        swav: bool,
+        requires_grad: bool
+    ):
+        super(Backbone, self).__init__()
+        resnet = getattr(models, name)(
+            replace_stride_with_dilation=[False, False, dilation],
+            pretrained=pretrained, norm_layer=FrozenBatchNorm2d
+        )
+        self.backbone = resnet
+        self.reduction = reduction
+        if name == 'resnet50' and swav:
+            checkpoint = torch.hub.load_state_dict_from_url(
+                'https://dl.fbaipublicfiles.com/deepcluster/swav_800ep_pretrain.pth.tar',
+                map_location="cpu"
+            )
+            state_dict = {k.replace("module.", ""): v for k, v in checkpoint.items()}
+            self.backbone.load_state_dict(state_dict, strict=False)
+        # concatenation of layers 2, 3 and 4
+        self.num_channels = 896 if name in ['resnet18', 'resnet34'] else 3584
+        for n, param in self.backbone.named_parameters():
+            if 'layer2' not in n and 'layer3' not in n and 'layer4' not in n:
+                param.requires_grad_(False)
+            else:
+                param.requires_grad_(requires_grad)
+    def forward(self, x):
+        size = x.size(-2) // self.reduction, x.size(-1) // self.reduction
+        x = self.backbone.conv1(x)
+        x = self.backbone.bn1(x)
+        x = self.backbone.relu(x)
+        x = self.backbone.maxpool(x)
+        x = self.backbone.layer1(x)
+        x = layer2 = self.backbone.layer2(x)
+        x = layer3 = self.backbone.layer3(x)
+        x = layer4 = self.backbone.layer4(x)
+        x = torch.cat([
+            F.interpolate(f, size=size, mode='bilinear', align_corners=True)
+            for f in [layer2, layer3, layer4]
+        ], dim=1)
+        return x

models/enc_model/loca.py ADDED Viewed

	@@ -0,0 +1,232 @@

+from .backbone import Backbone
+from .transformer import TransformerEncoder
+from .ope import OPEModule
+from .positional_encoding import PositionalEncodingsFixed
+from .regression_head import DensityMapRegressor
+import torch
+from torch import nn
+from torch.nn import functional as F
+class LOCA(nn.Module):
+    def __init__(
+        self,
+        image_size: int,
+        num_encoder_layers: int,
+        num_ope_iterative_steps: int,
+        num_objects: int,
+        emb_dim: int,
+        num_heads: int,
+        kernel_dim: int,
+        backbone_name: str,
+        swav_backbone: bool,
+        train_backbone: bool,
+        reduction: int,
+        dropout: float,
+        layer_norm_eps: float,
+        mlp_factor: int,
+        norm_first: bool,
+        activation: nn.Module,
+        norm: bool,
+        zero_shot: bool,
+    ):
+        super(LOCA, self).__init__()
+        self.emb_dim = emb_dim
+        self.num_objects = num_objects
+        self.reduction = reduction
+        self.kernel_dim = kernel_dim
+        self.image_size = image_size
+        self.zero_shot = zero_shot
+        self.num_heads = num_heads
+        self.num_encoder_layers = num_encoder_layers
+        self.backbone = Backbone(
+            backbone_name, pretrained=True, dilation=False, reduction=reduction,
+            swav=swav_backbone, requires_grad=train_backbone
+        )
+        self.input_proj = nn.Conv2d(
+            self.backbone.num_channels, emb_dim, kernel_size=1
+        )
+        if num_encoder_layers > 0:
+            self.encoder = TransformerEncoder(
+                num_encoder_layers, emb_dim, num_heads, dropout, layer_norm_eps,
+                mlp_factor, norm_first, activation, norm
+            )
+        self.ope = OPEModule(
+            num_ope_iterative_steps, emb_dim, kernel_dim, num_objects, num_heads,
+            reduction, layer_norm_eps, mlp_factor, norm_first, activation, norm, zero_shot
+        )
+        self.regression_head = DensityMapRegressor(emb_dim, reduction)
+        self.aux_heads = nn.ModuleList([
+            DensityMapRegressor(emb_dim, reduction)
+            for _ in range(num_ope_iterative_steps - 1)
+        ])
+        self.pos_emb = PositionalEncodingsFixed(emb_dim)
+        self.attn_norm = nn.LayerNorm(normalized_shape=(64, 64))
+        self.fuse = nn.Sequential(
+            nn.Conv2d(324, 256, kernel_size=1, stride=1),
+            nn.LeakyReLU(),
+            nn.LayerNorm((64, 64))
+        )
+        # self.fuse1 = nn.Sequential(
+        #     nn.Conv2d(322, 256, kernel_size=1, stride=1),
+        #     nn.LeakyReLU(),
+        #     nn.LayerNorm((64, 64))
+        # )
+    def forward_before_reg(self, x, bboxes):
+        num_objects = bboxes.size(1) if not self.zero_shot else self.num_objects
+        # backbone
+        backbone_features = self.backbone(x)
+        # prepare the encoder input
+        src = self.input_proj(backbone_features)
+        bs, c, h, w = src.size()
+        pos_emb = self.pos_emb(bs, h, w, src.device).flatten(2).permute(2, 0, 1)
+        src = src.flatten(2).permute(2, 0, 1)
+        # push through the encoder
+        if self.num_encoder_layers > 0:
+            image_features = self.encoder(src, pos_emb, src_key_padding_mask=None, src_mask=None)
+        else:
+            image_features = src
+        # prepare OPE input
+        f_e = image_features.permute(1, 2, 0).reshape(-1, self.emb_dim, h, w)
+        all_prototypes = self.ope(f_e, pos_emb, bboxes) # [3, 27, 1, 256]
+        outputs = list()
+        response_maps_list = []
+        for i in range(all_prototypes.size(0)):
+            prototypes = all_prototypes[i, ...].permute(1, 0, 2).reshape(
+                bs, num_objects, self.kernel_dim, self.kernel_dim, -1
+            ).permute(0, 1, 4, 2, 3).flatten(0, 2)[:, None, ...] # [768, 1, 3, 3]
+            response_maps = F.conv2d(
+                torch.cat([f_e for _ in range(num_objects)], dim=1).flatten(0, 1).unsqueeze(0),
+                prototypes,
+                bias=None,
+                padding=self.kernel_dim // 2,
+                groups=prototypes.size(0)
+            ).view(
+                bs, num_objects, self.emb_dim, h, w
+            ).max(dim=1)[0]
+            # # send through regression heads
+            # if i == all_prototypes.size(0) - 1:
+            #     predicted_dmaps = self.regression_head(response_maps)
+            # else:
+            #     predicted_dmaps = self.aux_heads[i](response_maps)
+            # outputs.append(predicted_dmaps)
+            response_maps_list.append(response_maps)
+        out = {
+            # "pred": outputs[-1],
+            "feature_bf_regression": response_maps_list[-1],
+            # "aux_pred": outputs[:-1],
+            "aux_feature_bf_regression": response_maps_list[:-1]
+        }
+        return out
+    def forward_reg(self, response_maps, attn_stack, unet_feature):
+        attn_stack = self.attn_norm(attn_stack)
+        attn_stack_mean = torch.mean(attn_stack, dim=1, keepdim=True)
+        unet_feature = torch.cat([unet_feature, attn_stack], dim=1) # [1, 324, 64, 64]
+        unet_feature = unet_feature * attn_stack_mean
+        if unet_feature.shape[1] == 322:
+            unet_feature = self.fuse1(unet_feature)
+        else:
+            unet_feature = self.fuse(unet_feature)
+        response_maps = response_maps["aux_feature_bf_regression"] + [response_maps["feature_bf_regression"]]
+        outputs = []
+        for i in range(len(response_maps)):
+            response_map = response_maps[i] + unet_feature
+            if i == len(response_maps) - 1:
+                predicted_dmaps = self.regression_head(response_map)
+            else:
+                predicted_dmaps = self.aux_heads[i](response_map)
+            outputs.append(predicted_dmaps)
+        return {"pred": outputs[-1], "aux_pred": outputs[:-1]}
+    def forward_reg1(self, response_maps, self_attn):
+        # attn_stack = self.attn_norm(attn_stack)
+        # attn_stack_mean = torch.mean(attn_stack, dim=1, keepdim=True)
+        # unet_feature = torch.cat([unet_feature, attn_stack], dim=1) # [1, 324, 64, 64]
+        # unet_feature = unet_feature * attn_stack_mean
+        # if unet_feature.shape[1] == 322:
+        #     unet_feature = self.fuse1(unet_feature)
+        # else:
+        #     unet_feature = self.fuse(unet_feature)
+        response_maps = response_maps["aux_feature_bf_regression"] + [response_maps["feature_bf_regression"]]
+        outputs = []
+        for i in range(len(response_maps)):
+            response_map = response_maps[i] + self_attn
+            if i == len(response_maps) - 1:
+                predicted_dmaps = self.regression_head(response_map)
+            else:
+                predicted_dmaps = self.aux_heads[i](response_map)
+            outputs.append(predicted_dmaps)
+        return {"pred": outputs[-1], "aux_pred": outputs[:-1]}
+    def forward_reg_without_unet(self, response_maps, attn_stack):
+        # attn_stack = self.attn_norm(attn_stack)
+        attn_stack_mean = torch.mean(attn_stack, dim=1, keepdim=True)
+        response_maps = response_maps["aux_feature_bf_regression"] + [response_maps["feature_bf_regression"]]
+        outputs = []
+        for i in range(len(response_maps)):
+            response_map = response_maps[i] * attn_stack_mean * 0.5 + response_maps[i]
+            if i == len(response_maps) - 1:
+                predicted_dmaps = self.regression_head(response_map)
+            else:
+                predicted_dmaps = self.aux_heads[i](response_map)
+            outputs.append(predicted_dmaps)
+        return {"pred": outputs[-1], "aux_pred": outputs[:-1]}
+def build_model(args):
+    assert args.backbone in ['resnet18', 'resnet50', 'resnet101']
+    assert args.reduction in [4, 8, 16]
+    return LOCA(
+        image_size=args.image_size,
+        num_encoder_layers=args.num_enc_layers,
+        num_ope_iterative_steps=args.num_ope_iterative_steps,
+        num_objects=args.num_objects,
+        zero_shot=args.zero_shot,
+        emb_dim=args.emb_dim,
+        num_heads=args.num_heads,
+        kernel_dim=args.kernel_dim,
+        backbone_name=args.backbone,
+        swav_backbone=args.swav_backbone,
+        train_backbone=args.backbone_lr > 0,
+        reduction=args.reduction,
+        dropout=args.dropout,
+        layer_norm_eps=1e-5,
+        mlp_factor=8,
+        norm_first=args.pre_norm,
+        activation=nn.GELU,
+        norm=True,
+    )

models/enc_model/loca_args.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import argparse
+def get_argparser():
+    parser = argparse.ArgumentParser("LOCA parser", add_help=False)
+    parser.add_argument('--model_name', default='loca_few_shot', type=str)
+    parser.add_argument(
+        '--data_path',
+        default='./data/FSC147_384_V2',
+        type=str
+    )
+    parser.add_argument(
+        '--model_path',
+        default='ckpt',
+        type=str
+    )
+    parser.add_argument('--backbone', default='resnet50', type=str)
+    parser.add_argument('--swav_backbone', action='store_true', default=True)
+    parser.add_argument('--reduction', default=8, type=int)
+    parser.add_argument('--image_size', default=512, type=int)
+    parser.add_argument('--num_enc_layers', default=3, type=int)
+    parser.add_argument('--num_ope_iterative_steps', default=3, type=int)
+    parser.add_argument('--emb_dim', default=256, type=int)
+    parser.add_argument('--num_heads', default=8, type=int)
+    parser.add_argument('--kernel_dim', default=3, type=int)
+    parser.add_argument('--num_objects', default=3, type=int)
+    parser.add_argument('--epochs', default=200, type=int)
+    parser.add_argument('--resume_training', action='store_true')
+    parser.add_argument('--lr', default=1e-4, type=float)
+    parser.add_argument('--backbone_lr', default=0, type=float)
+    parser.add_argument('--lr_drop', default=200, type=int)
+    parser.add_argument('--weight_decay', default=1e-4, type=float)
+    parser.add_argument('--batch_size', default=1, type=int)
+    parser.add_argument('--dropout', default=0.1, type=float)
+    parser.add_argument('--num_workers', default=8, type=int)
+    parser.add_argument('--max_grad_norm', default=0.1, type=float)
+    parser.add_argument('--aux_weight', default=0.3, type=float)
+    parser.add_argument('--tiling_p', default=0.5, type=float)
+    parser.add_argument('--zero_shot', action='store_true')
+    parser.add_argument('--pre_norm', action='store_true', default=True)
+    return parser

models/enc_model/mlp.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from torch import nn
+class MLP(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        dropout: float,
+        activation: nn.Module
+    ):
+        super(MLP, self).__init__()
+        self.linear1 = nn.Linear(input_dim, hidden_dim)
+        self.linear2 = nn.Linear(hidden_dim, input_dim)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = activation()
+    def forward(self, x):
+        return (
+            self.linear2(self.dropout(self.activation(self.linear1(x))))
+        )

models/enc_model/ope.py ADDED Viewed

	@@ -0,0 +1,245 @@

+from .mlp import MLP
+from .positional_encoding import PositionalEncodingsFixed
+import torch
+from torch import nn
+from torchvision.ops import roi_align
+class OPEModule(nn.Module):
+    def __init__(
+        self,
+        num_iterative_steps: int,
+        emb_dim: int,
+        kernel_dim: int,
+        num_objects: int,
+        num_heads: int,
+        reduction: int,
+        layer_norm_eps: float,
+        mlp_factor: int,
+        norm_first: bool,
+        activation: nn.Module,
+        norm: bool,
+        zero_shot: bool,
+    ):
+        super(OPEModule, self).__init__()
+        self.num_iterative_steps = num_iterative_steps
+        self.zero_shot = zero_shot
+        self.kernel_dim = kernel_dim
+        self.num_objects = num_objects
+        self.emb_dim = emb_dim
+        self.reduction = reduction
+        if num_iterative_steps > 0:
+            self.iterative_adaptation = IterativeAdaptationModule(
+                num_layers=num_iterative_steps, emb_dim=emb_dim, num_heads=num_heads,
+                dropout=0, layer_norm_eps=layer_norm_eps,
+                mlp_factor=mlp_factor, norm_first=norm_first,
+                activation=activation, norm=norm,
+                zero_shot=zero_shot
+            )
+        if not self.zero_shot:
+            self.shape_or_objectness = nn.Sequential(
+                nn.Linear(2, 64),
+                nn.ReLU(),
+                nn.Linear(64, emb_dim),
+                nn.ReLU(),
+                nn.Linear(emb_dim, self.kernel_dim**2 * emb_dim)
+            )
+        else:
+            self.shape_or_objectness = nn.Parameter(
+                torch.empty((self.num_objects, self.kernel_dim**2, emb_dim))
+            )
+            nn.init.normal_(self.shape_or_objectness)
+        self.pos_emb = PositionalEncodingsFixed(emb_dim)
+    def forward(self, f_e, pos_emb, bboxes):
+        bs, _, h, w = f_e.size()
+        # extract the shape features or objectness
+        if not self.zero_shot:
+            box_hw = torch.zeros(bboxes.size(0), bboxes.size(1), 2).to(bboxes.device)
+            box_hw[:, :, 0] = bboxes[:, :, 2] - bboxes[:, :, 0]
+            box_hw[:, :, 1] = bboxes[:, :, 3] - bboxes[:, :, 1]
+            shape_or_objectness = self.shape_or_objectness(box_hw).reshape(
+                bs, -1, self.kernel_dim ** 2, self.emb_dim
+            ).flatten(1, 2).transpose(0, 1)
+        else:
+            shape_or_objectness = self.shape_or_objectness.expand(
+                bs, -1, -1, -1
+            ).flatten(1, 2).transpose(0, 1)
+        # if not zero shot add appearance
+        if not self.zero_shot:
+            # reshape bboxes into the format suitable for roi_align
+            num_of_boxes = bboxes.size(1)
+            bboxes = torch.cat([
+                torch.arange(
+                    bs, requires_grad=False
+                ).to(bboxes.device).repeat_interleave(num_of_boxes).reshape(-1, 1),
+                bboxes.flatten(0, 1),
+            ], dim=1)
+            appearance = roi_align(
+                f_e,
+                boxes=bboxes, output_size=self.kernel_dim,
+                spatial_scale=1.0 / self.reduction, aligned=True
+            ).permute(0, 2, 3, 1).reshape(
+                bs, num_of_boxes * self.kernel_dim ** 2, -1
+            ).transpose(0, 1)
+        else:
+            num_of_boxes = self.num_objects
+            appearance = None
+        query_pos_emb = self.pos_emb(
+            bs, self.kernel_dim, self.kernel_dim, f_e.device
+        ).flatten(2).permute(2, 0, 1).repeat(num_of_boxes, 1, 1)
+        if self.num_iterative_steps > 0:
+            memory = f_e.flatten(2).permute(2, 0, 1)
+            all_prototypes = self.iterative_adaptation(
+                shape_or_objectness, appearance, memory, pos_emb, query_pos_emb
+            )
+        else:
+            if shape_or_objectness is not None and appearance is not None:
+                all_prototypes = (shape_or_objectness + appearance).unsqueeze(0)
+            else:
+                all_prototypes = (
+                    shape_or_objectness if shape_or_objectness is not None else appearance
+                ).unsqueeze(0)
+        return all_prototypes
+class IterativeAdaptationModule(nn.Module):
+    def __init__(
+        self,
+        num_layers: int,
+        emb_dim: int,
+        num_heads: int,
+        dropout: float,
+        layer_norm_eps: float,
+        mlp_factor: int,
+        norm_first: bool,
+        activation: nn.Module,
+        norm: bool,
+        zero_shot: bool
+    ):
+        super(IterativeAdaptationModule, self).__init__()
+        self.layers = nn.ModuleList([
+            IterativeAdaptationLayer(
+                emb_dim, num_heads, dropout, layer_norm_eps,
+                mlp_factor, norm_first, activation, zero_shot
+            ) for i in range(num_layers)
+        ])
+        self.norm = nn.LayerNorm(emb_dim, layer_norm_eps) if norm else nn.Identity()
+    def forward(
+        self, tgt, appearance, memory, pos_emb, query_pos_emb, tgt_mask=None, memory_mask=None,
+        tgt_key_padding_mask=None, memory_key_padding_mask=None
+    ):
+        output = tgt
+        outputs = list()
+        for i, layer in enumerate(self.layers):
+            output = layer(
+                output, appearance, memory, pos_emb, query_pos_emb, tgt_mask, memory_mask,
+                tgt_key_padding_mask, memory_key_padding_mask
+            )
+            outputs.append(self.norm(output))
+        return torch.stack(outputs)
+class IterativeAdaptationLayer(nn.Module):
+    def __init__(
+        self,
+        emb_dim: int,
+        num_heads: int,
+        dropout: float,
+        layer_norm_eps: float,
+        mlp_factor: int,
+        norm_first: bool,
+        activation: nn.Module,
+        zero_shot: bool
+    ):
+        super(IterativeAdaptationLayer, self).__init__()
+        self.norm_first = norm_first
+        self.zero_shot = zero_shot
+        if not self.zero_shot:
+            self.norm1 = nn.LayerNorm(emb_dim, layer_norm_eps)
+        self.norm2 = nn.LayerNorm(emb_dim, layer_norm_eps)
+        self.norm3 = nn.LayerNorm(emb_dim, layer_norm_eps)
+        if not self.zero_shot:
+            self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+        if not self.zero_shot:
+            self.self_attn = nn.MultiheadAttention(emb_dim, num_heads, dropout)
+        self.enc_dec_attn = nn.MultiheadAttention(emb_dim, num_heads, dropout)
+        self.mlp = MLP(emb_dim, mlp_factor * emb_dim, dropout, activation)
+    def with_emb(self, x, emb):
+        return x if emb is None else x + emb
+    def forward(
+        self, tgt, appearance, memory, pos_emb, query_pos_emb, tgt_mask, memory_mask,
+        tgt_key_padding_mask, memory_key_padding_mask
+    ):
+        if self.norm_first:
+            if not self.zero_shot:
+                tgt_norm = self.norm1(tgt)
+                tgt = tgt + self.dropout1(self.self_attn(
+                    query=self.with_emb(tgt_norm, query_pos_emb),
+                    key=self.with_emb(appearance, query_pos_emb),
+                    value=appearance,
+                    attn_mask=tgt_mask,
+                    key_padding_mask=tgt_key_padding_mask
+                )[0])
+            tgt_norm = self.norm2(tgt)
+            tgt = tgt + self.dropout2(self.enc_dec_attn(
+                query=self.with_emb(tgt_norm, query_pos_emb),
+                key=memory+pos_emb,
+                value=memory,
+                attn_mask=memory_mask,
+                key_padding_mask=memory_key_padding_mask
+            )[0])
+            tgt_norm = self.norm3(tgt)
+            tgt = tgt + self.dropout3(self.mlp(tgt_norm))
+        else:
+            if not self.zero_shot:
+                tgt = self.norm1(tgt + self.dropout1(self.self_attn(
+                    query=self.with_emb(tgt, query_pos_emb),
+                    key=self.with_emb(appearance),
+                    value=appearance,
+                    attn_mask=tgt_mask,
+                    key_padding_mask=tgt_key_padding_mask
+                )[0]))
+            tgt = self.norm2(tgt + self.dropout2(self.enc_dec_attn(
+                query=self.with_emb(tgt, query_pos_emb),
+                key=memory+pos_emb,
+                value=memory,
+                attn_mask=memory_mask,
+                key_padding_mask=memory_key_padding_mask
+            )[0]))
+            tgt = self.norm3(tgt + self.dropout3(self.mlp(tgt)))
+        return tgt

models/enc_model/positional_encoding.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import torch
+from torch import nn
+class PositionalEncodingsFixed(nn.Module):
+    def __init__(self, emb_dim, temperature=10000):
+        super(PositionalEncodingsFixed, self).__init__()
+        self.emb_dim = emb_dim
+        self.temperature = temperature
+    def _1d_pos_enc(self, mask, dim):
+        temp = torch.arange(self.emb_dim // 2).float().to(mask.device)
+        temp = self.temperature ** (2 * (temp.div(2, rounding_mode='floor')) / self.emb_dim)
+        enc = (~mask).cumsum(dim).float().unsqueeze(-1) / temp
+        enc = torch.stack([
+            enc[..., 0::2].sin(), enc[..., 1::2].cos()
+        ], dim=-1).flatten(-2)
+        return enc
+    def forward(self, bs, h, w, device):
+        mask = torch.zeros(bs, h, w, dtype=torch.bool, requires_grad=False, device=device)
+        x = self._1d_pos_enc(mask, dim=2)
+        y = self._1d_pos_enc(mask, dim=1)
+        return torch.cat([y, x], dim=3).permute(0, 3, 1, 2)

models/enc_model/regression_head.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from torch import nn
+import torch
+class UpsamplingLayer(nn.Module):
+    def __init__(self, in_channels, out_channels, leaky=True):
+        super(UpsamplingLayer, self).__init__()
+        self.layer = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.LeakyReLU() if leaky else nn.ReLU(),
+            nn.UpsamplingBilinear2d(scale_factor=2)
+        )
+    def forward(self, x):
+        return self.layer(x)
+class DensityMapRegressor(nn.Module):
+    def __init__(self, in_channels, reduction):
+        super(DensityMapRegressor, self).__init__()
+        if reduction == 8:
+            self.regressor = nn.Sequential(
+                UpsamplingLayer(in_channels, 128),
+                UpsamplingLayer(128, 64),
+                UpsamplingLayer(64, 32),
+                nn.Conv2d(32, 1, kernel_size=1),
+                nn.LeakyReLU()
+            )
+        elif reduction == 16:
+            self.regressor = nn.Sequential(
+                UpsamplingLayer(in_channels, 128),
+                UpsamplingLayer(128, 64),
+                UpsamplingLayer(64, 32),
+                UpsamplingLayer(32, 16),
+                nn.Conv2d(16, 1, kernel_size=1),
+                nn.LeakyReLU()
+            )
+        self.reset_parameters()
+    def forward(self, x):
+        return self.regressor(x)
+    def reset_parameters(self):
+        for module in self.modules():
+            if isinstance(module, nn.Conv2d):
+                nn.init.normal_(module.weight, std=0.01)
+                if module.bias is not None:
+                    nn.init.constant_(module.bias, 0)
+class DensityMapRegressor_(nn.Module):
+    def __init__(self, in_channels, reduction):
+        super(DensityMapRegressor, self).__init__()
+        if reduction == 8:
+            self.regressor = nn.Sequential(
+                UpsamplingLayer(in_channels, 128),
+                UpsamplingLayer(128, 64),
+                UpsamplingLayer(64, 32),
+                nn.Conv2d(32, 1, kernel_size=1),
+                nn.LeakyReLU()
+            )
+        elif reduction == 16:
+            self.regressor = nn.Sequential(
+                UpsamplingLayer(in_channels, 128),
+                UpsamplingLayer(128, 64),
+                UpsamplingLayer(64, 32),
+                UpsamplingLayer(32, 16),
+                nn.Conv2d(16, 1, kernel_size=1),
+                nn.LeakyReLU()
+            )
+        self.reset_parameters()
+    def forward(self, x):
+        return self.regressor(x)
+    def reset_parameters(self):
+        for module in self.modules():
+            if isinstance(module, nn.Conv2d):
+                nn.init.normal_(module.weight, std=0.01)
+                if module.bias is not None:
+                    nn.init.constant_(module.bias, 0)

models/enc_model/transformer.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from .mlp import MLP
+from torch import nn
+class TransformerEncoder(nn.Module):
+    def __init__(
+        self,
+        num_layers: int,
+        emb_dim: int,
+        num_heads: int,
+        dropout: float,
+        layer_norm_eps: float,
+        mlp_factor: int,
+        norm_first: bool,
+        activation: nn.Module,
+        norm: bool,
+    ):
+        super(TransformerEncoder, self).__init__()
+        self.layers = nn.ModuleList([
+            TransformerEncoderLayer(
+                emb_dim, num_heads, dropout, layer_norm_eps,
+                mlp_factor, norm_first, activation
+            ) for _ in range(num_layers)
+        ])
+        self.norm = nn.LayerNorm(emb_dim, layer_norm_eps) if norm else nn.Identity()
+    def forward(self, src, pos_emb, src_mask, src_key_padding_mask):
+        output = src
+        for layer in self.layers:
+            output = layer(output, pos_emb, src_mask, src_key_padding_mask)
+        return self.norm(output)
+class TransformerEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        emb_dim: int,
+        num_heads: int,
+        dropout: float,
+        layer_norm_eps: float,
+        mlp_factor: int,
+        norm_first: bool,
+        activation: nn.Module,
+    ):
+        super(TransformerEncoderLayer, self).__init__()
+        self.norm_first = norm_first
+        self.norm1 = nn.LayerNorm(emb_dim, layer_norm_eps)
+        self.norm2 = nn.LayerNorm(emb_dim, layer_norm_eps)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.self_attn = nn.MultiheadAttention(
+            emb_dim, num_heads, dropout
+        )
+        self.mlp = MLP(emb_dim, mlp_factor * emb_dim, dropout, activation)
+    def with_emb(self, x, emb):
+        return x if emb is None else x + emb
+    def forward(self, src, pos_emb, src_mask, src_key_padding_mask):
+        if self.norm_first:
+            src_norm = self.norm1(src)
+            q = k = src_norm + pos_emb
+            src = src + self.dropout1(self.self_attn(
+                query=q,
+                key=k,
+                value=src_norm,
+                attn_mask=src_mask,
+                key_padding_mask=src_key_padding_mask
+            )[0])
+            src_norm = self.norm2(src)
+            src = src + self.dropout2(self.mlp(src_norm))
+        else:
+            q = k = src + pos_emb
+            src = self.norm1(src + self.dropout1(self.self_attn(
+                query=q,
+                key=k,
+                value=src,
+                attn_mask=src_mask,
+                key_padding_mask=src_key_padding_mask
+            )[0]))
+            src = self.norm2(src + self.dropout2(self.mlp(src)))
+        return src

models/enc_model/unet_parts.py ADDED Viewed

	@@ -0,0 +1,77 @@

+""" Parts of the U-Net model """
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class DoubleConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+    def __init__(self, in_channels, out_channels, mid_channels=None):
+        super().__init__()
+        if not mid_channels:
+            mid_channels = out_channels
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.double_conv(x)
+class Down(nn.Module):
+    """Downscaling with maxpool then double conv"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.maxpool_conv = nn.Sequential(
+            nn.MaxPool2d(2),
+            DoubleConv(in_channels, out_channels)
+        )
+    def forward(self, x):
+        return self.maxpool_conv(x)
+class Up(nn.Module):
+    """Upscaling then double conv"""
+    def __init__(self, in_channels, out_channels, bilinear=True):
+        super().__init__()
+        # if bilinear, use the normal convolutions to reduce the number of channels
+        if bilinear:
+            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
+        else:
+            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
+            self.conv = DoubleConv(in_channels, out_channels)
+    def forward(self, x1, x2):
+        x1 = self.up(x1)
+        # input is CHW
+        diffY = x2.size()[2] - x1.size()[2]
+        diffX = x2.size()[3] - x1.size()[3]
+        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
+                        diffY // 2, diffY - diffY // 2])
+        # if you have padding issues, see
+        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
+        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
+        x = torch.cat([x2, x1], dim=1)
+        return self.conv(x)
+class OutConv(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(OutConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+    def forward(self, x):
+        return self.conv(x)

models/model.py ADDED Viewed

	@@ -0,0 +1,653 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import os
+import clip
+import sys
+import numpy as np
+from models.seg_post_model.cellpose.models import CellposeModel
+from torchvision.ops import roi_align
+def crop_roi_feat(feat, boxes):
+    """
+    feat: 1 x c x h x w
+    boxes: m x 4, 4: [y_tl, x_tl, y_br, x_br]
+    """
+    _, _, h, w = feat.shape
+    out_stride = 512 / h
+    boxes_scaled = boxes / out_stride
+    boxes_scaled[:, :2] = torch.floor(boxes_scaled[:, :2])  # y_tl, x_tl: floor
+    boxes_scaled[:, 2:] = torch.ceil(boxes_scaled[:, 2:])  # y_br, x_br: ceil
+    boxes_scaled[:, :2] = torch.clamp_min(boxes_scaled[:, :2], 0)
+    boxes_scaled[:, 2] = torch.clamp_max(boxes_scaled[:, 2], h)
+    boxes_scaled[:, 3] = torch.clamp_max(boxes_scaled[:, 3], w)
+    feat_boxes = []
+    for idx_box in range(0, boxes.shape[0]):
+        y_tl, x_tl, y_br, x_br = boxes_scaled[idx_box]
+        y_tl, x_tl, y_br, x_br = int(y_tl), int(x_tl), int(y_br), int(x_br)
+        feat_box = feat[:, :, y_tl : (y_br + 1), x_tl : (x_br + 1)]
+        feat_boxes.append(feat_box)
+    return feat_boxes
+class Counting_with_SD_features(nn.Module):
+    def __init__(self, scale_factor):
+        super(Counting_with_SD_features, self).__init__()
+        self.adapter = adapter_roi()
+        # self.regressor = regressor_with_SD_features()
+class Counting_with_SD_features_loca(nn.Module):
+    def __init__(self, scale_factor):
+        super(Counting_with_SD_features_loca, self).__init__()
+        self.adapter = adapter_roi_loca()
+        self.regressor = regressor_with_SD_features()
+class Counting_with_SD_features_dino_vit_c3(nn.Module):
+    def __init__(self, scale_factor, vit=None):
+        super(Counting_with_SD_features_dino_vit_c3, self).__init__()
+        self.adapter = adapter_roi_loca()
+        self.regressor = regressor_with_SD_features_seg_vit_c3()
+class Counting_with_SD_features_track(nn.Module):
+    def __init__(self, scale_factor, vit=None):
+        super(Counting_with_SD_features_track, self).__init__()
+        self.adapter = adapter_roi_loca()
+        self.regressor = regressor_with_SD_features_tra()
+class adapter_roi(nn.Module):
+    def __init__(self, pool_size=[3, 3]):
+        super(adapter_roi, self).__init__()
+        self.pool_size = pool_size
+        self.conv1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        # self.relu = nn.ReLU()
+        # self.conv2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        self.pool = nn.MaxPool2d(2)
+        self.fc = nn.Linear(256 * 3 * 3, 768)
+        # **new
+        self.fc1 = nn.Sequential(
+            nn.ReLU(),
+            nn.Linear(768, 768 // 4, bias=False),
+            nn.ReLU()
+            )
+        self.fc2 = nn.Sequential(
+            nn.Linear(768 // 4, 768, bias=False),
+            # nn.ReLU()
+            )
+        self.initialize_weights()
+    def forward(self, x, boxes):
+            num_of_boxes = boxes.shape[1]
+            rois = []
+            bs, _, h, w = x.shape
+            boxes = torch.cat([
+                torch.arange(
+                    bs, requires_grad=False
+                ).to(boxes.device).repeat_interleave(num_of_boxes).reshape(-1, 1),
+                boxes.flatten(0, 1),
+            ], dim=1)
+            rois = roi_align(
+                x,
+                boxes=boxes, output_size=3,
+                spatial_scale=1.0 / 8, aligned=True
+            )
+            rois = torch.mean(rois, dim=0, keepdim=True)
+            x = self.conv1(rois)
+            x = x.view(x.size(0), -1)
+            x = self.fc(x)
+            x = self.fc1(x)
+            x = self.fc2(x)
+            return x
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class adapter_roi_loca(nn.Module):
+    def __init__(self, pool_size=[3, 3]):
+        super(adapter_roi_loca, self).__init__()
+        self.pool_size = pool_size
+        self.conv1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        self.pool = nn.MaxPool2d(2)
+        self.fc = nn.Linear(256 * 3 * 3, 768)
+        self.initialize_weights()
+    def forward(self, x, boxes):
+            num_of_boxes = boxes.shape[1]
+            rois = []
+            bs, _, h, w = x.shape
+            if h != 512 or w != 512:
+                x = F.interpolate(x, size=(512, 512), mode='bilinear', align_corners=False)
+            if bs == 1:
+                boxes = torch.cat([
+                    torch.arange(
+                        bs, requires_grad=False
+                    ).to(boxes.device).repeat_interleave(num_of_boxes).reshape(-1, 1),
+                    boxes.flatten(0, 1),
+                ], dim=1)
+                rois = roi_align(
+                    x,
+                    boxes=boxes, output_size=3,
+                    spatial_scale=1.0 / 8, aligned=True
+                )
+                rois = torch.mean(rois, dim=0, keepdim=True)
+            else:
+                boxes = torch.cat([
+                    boxes.flatten(0, 1),
+                ], dim=1).split(num_of_boxes, dim=0)
+                rois = roi_align(
+                    x,
+                    boxes=boxes, output_size=3,
+                    spatial_scale=1.0 / 8, aligned=True
+                )
+                rois = rois.split(num_of_boxes, dim=0)
+                rois = torch.stack(rois, dim=0)
+                rois = torch.mean(rois, dim=1, keepdim=False)
+            x = self.conv1(rois)
+            x = x.view(x.size(0), -1)
+            x = self.fc(x)
+            return x
+    def forward_boxes(self, x, boxes):
+            num_of_boxes = boxes.shape[1]
+            rois = []
+            bs, _, h, w = x.shape
+            if h != 512 or w != 512:
+                x = F.interpolate(x, size=(512, 512), mode='bilinear', align_corners=False)
+            if bs == 1:
+                boxes = torch.cat([
+                    torch.arange(
+                        bs, requires_grad=False
+                    ).to(boxes.device).repeat_interleave(num_of_boxes).reshape(-1, 1),
+                    boxes.flatten(0, 1),
+                ], dim=1)
+                rois = roi_align(
+                    x,
+                    boxes=boxes, output_size=3,
+                    spatial_scale=1.0 / 8, aligned=True
+                )
+                # rois = torch.mean(rois, dim=0, keepdim=True)
+            else:
+                raise NotImplementedError
+            x = self.conv1(rois)
+            x = x.view(x.size(0), -1)
+            x = self.fc(x)
+            return x
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class regressor1(nn.Module):
+    def __init__(self):
+        super(regressor1, self).__init__()
+        self.conv1 = nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1)
+        self.conv2 = nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1)
+        self.conv3 = nn.Conv2d(4, 1, kernel_size=3, stride=1, padding=1)
+        self.upsampler = nn.UpsamplingBilinear2d(scale_factor=2)
+        self.leaky_relu = nn.LeakyReLU()
+        self.relu = nn.ReLU()
+        self.initialize_weights()
+    def forward(self, x):
+        x_ = self.conv1(x)
+        x_ = self.leaky_relu(x_)
+        x_ = self.upsampler(x_)
+        x_ = self.conv2(x_)
+        x_ = self.leaky_relu(x_)
+        x_ = self.upsampler(x_)
+        x_ = self.conv3(x_)
+        x_ = self.relu(x_)
+        out = x_
+        return out
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class regressor1(nn.Module):
+    def __init__(self):
+        super(regressor1, self).__init__()
+        self.conv1 = nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1)
+        self.conv2 = nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1)
+        self.conv3 = nn.Conv2d(4, 1, kernel_size=3, stride=1, padding=1)
+        self.upsampler = nn.UpsamplingBilinear2d(scale_factor=2)
+        self.leaky_relu = nn.LeakyReLU()
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        x_ = self.conv1(x)
+        x_ = self.leaky_relu(x_)
+        x_ = self.upsampler(x_)
+        x_ = self.conv2(x_)
+        x_ = self.leaky_relu(x_)
+        x_ = self.upsampler(x_)
+        x_ = self.conv3(x_)
+        x_ = self.relu(x_)
+        out = x_
+        return out
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class regressor_with_SD_features(nn.Module):
+    def __init__(self):
+        super(regressor_with_SD_features, self).__init__()
+        self.layer1 = nn.Sequential(
+            nn.Conv2d(324, 256, kernel_size=1, stride=1),
+            nn.LeakyReLU(),
+            nn.LayerNorm((64, 64))
+        )
+        self.layer2 = nn.Sequential(
+            nn.Conv2d(256, 128, kernel_size=3, padding=1),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(in_channels=128, out_channels=128, kernel_size=4, stride=2, padding=1),
+        )
+        self.layer3 = nn.Sequential(
+            nn.Conv2d(128, 64, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=4, stride=2, padding=1),
+        )
+        self.layer4 = nn.Sequential(
+            nn.Conv2d(64, 32, kernel_size=3, padding=1),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(in_channels=32, out_channels=32, kernel_size=4, stride=2, padding=1),
+        )
+        self.conv = nn.Sequential(
+            nn.Conv2d(32, 1, kernel_size=1),
+            nn.ReLU()
+        )
+        self.norm = nn.LayerNorm(normalized_shape=(64, 64))
+        self.initialize_weights()
+    def forward(self, attn_stack, feature_list):
+        attn_stack = self.norm(attn_stack)
+        unet_feature = feature_list[-1]
+        attn_stack_mean = torch.mean(attn_stack, dim=1, keepdim=True)
+        unet_feature = unet_feature * attn_stack_mean
+        unet_feature = torch.cat([unet_feature, attn_stack], dim=1) # [1, 324, 64, 64]
+        x = self.layer1(unet_feature)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        out = self.conv(x)
+        return out / 100
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class regressor_with_SD_features_seg(nn.Module):
+    def __init__(self):
+        super(regressor_with_SD_features_seg, self).__init__()
+        self.layer1 = nn.Sequential(
+            nn.Conv2d(324, 256, kernel_size=1, stride=1),
+            nn.LeakyReLU(),
+            nn.LayerNorm((64, 64))
+        )
+        self.layer2 = nn.Sequential(
+            nn.Conv2d(256, 128, kernel_size=3, padding=1),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(in_channels=128, out_channels=128, kernel_size=4, stride=2, padding=1),
+        )
+        self.layer3 = nn.Sequential(
+            nn.Conv2d(128, 64, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=4, stride=2, padding=1),
+        )
+        self.layer4 = nn.Sequential(
+            nn.Conv2d(64, 32, kernel_size=3, padding=1),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(in_channels=32, out_channels=32, kernel_size=4, stride=2, padding=1),
+        )
+        self.conv = nn.Sequential(
+            nn.Conv2d(32, 2, kernel_size=1),
+            # nn.ReLU()
+        )
+        self.norm = nn.LayerNorm(normalized_shape=(64, 64))
+        self.initialize_weights()
+    def forward(self, attn_stack, feature_list):
+        attn_stack = self.norm(attn_stack)
+        unet_feature = feature_list[-1]
+        attn_stack_mean = torch.mean(attn_stack, dim=1, keepdim=True)
+        unet_feature = unet_feature * attn_stack_mean
+        unet_feature = torch.cat([unet_feature, attn_stack], dim=1) # [1, 324, 64, 64]
+        x = self.layer1(unet_feature)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        out = self.conv(x)
+        return out
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+from models.enc_model.unet_parts import *
+class regressor_with_SD_features_seg_vit_c3(nn.Module):
+    def __init__(self, n_channels=3, n_classes=2, bilinear=False):
+        super(regressor_with_SD_features_seg_vit_c3, self).__init__()
+        self.n_channels = n_channels
+        self.n_classes = n_classes
+        self.bilinear = bilinear
+        self.norm = nn.LayerNorm(normalized_shape=(64, 64))
+        self.inc_0 = nn.Conv2d(n_channels, 3, kernel_size=3, padding=1)
+        self.vit_model = CellposeModel(gpu=True, nchan=3, pretrained_model="", use_bfloat16=False)
+        self.vit = self.vit_model.net
+    def forward(self, img, attn_stack, feature_list):
+        attn_stack = attn_stack[:, [1,3], ...]
+        attn_stack = self.norm(attn_stack)
+        unet_feature = feature_list[-1]
+        unet_feature_mean = torch.mean(unet_feature, dim=1, keepdim=True)
+        x = torch.cat([unet_feature_mean, attn_stack], dim=1) # [1, 324, 64, 64]
+        if x.shape[-1] != 512:
+            x = F.interpolate(x, size=(512, 512), mode="bilinear")
+        x = self.inc_0(x)
+        out = self.vit_model.eval(img.squeeze().cpu().numpy(), feat=x.squeeze().cpu().numpy())[0]
+        if out.dtype == np.uint16:
+            out = out.astype(np.int16)
+        out = torch.from_numpy(out).unsqueeze(0).to(x.device)
+        return out
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class regressor_with_SD_features_tra(nn.Module):
+    def __init__(self, n_channels=2, n_classes=2, bilinear=False):
+        super(regressor_with_SD_features_tra, self).__init__()
+        self.n_channels = n_channels
+        self.n_classes = n_classes
+        self.bilinear = bilinear
+        self.norm = nn.LayerNorm(normalized_shape=(64, 64))
+        # segmentation
+        self.inc_0 = nn.Conv2d(3, 3, kernel_size=3, padding=1)
+        self.vit_model = CellposeModel(gpu=True, nchan=3, pretrained_model="", use_bfloat16=False)
+        self.vit = self.vit_model.net
+        self.inc_1 = nn.Conv2d(n_channels, 1, kernel_size=3, padding=1)
+        self.mlp = nn.Linear(64 * 64, 320)
+        # self.vit = self.vit_model.net.float()
+    def forward_seg(self, img, attn_stack, feature_list, mask, training=False):
+        attn_stack = attn_stack[:, [1,3], ...]
+        attn_stack = self.norm(attn_stack)
+        unet_feature = feature_list[-1]
+        unet_feature_mean = torch.mean(unet_feature, dim=1, keepdim=True)
+        x = torch.cat([unet_feature_mean, attn_stack], dim=1) # [1, 324, 64, 64]
+        if x.shape[-1] != 512:
+            x = F.interpolate(x, size=(512, 512), mode="bilinear")
+        x = self.inc_0(x)
+        feat = x
+        out = self.vit_model.eval(img.squeeze().cpu().numpy(), feat=x.squeeze().cpu().numpy())[0]
+        if out.dtype == np.uint16:
+            out = out.astype(np.int16)
+        out = torch.from_numpy(out).unsqueeze(0).to(x.device)
+        return out, 0., feat
+    def forward(self, attn_prev, feature_list_prev, attn_after, feature_list_after):
+        assert attn_prev.shape == attn_after.shape, "attn_prev and attn_after must have the same shape"
+        n_instances = attn_prev.shape[0]
+        attn_prev = self.norm(attn_prev) # [n_instances, 1, 64, 64]
+        attn_after = self.norm(attn_after)
+        x = torch.cat([attn_prev, attn_after], dim=1)  # n_instances, 2, 64, 64
+        x = self.inc_1(x)
+        x = x.view(1, n_instances, -1)  # Flatten the tensor to [n_instances, 64*64*4]
+        x = self.mlp(x)  # Apply the MLP to get the output
+        return x  # Output shape will be [n_instances, 4]
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class regressor_with_SD_features_inst_seg_unet(nn.Module):
+    def __init__(self, n_channels=8, n_classes=3, bilinear=False):
+        super(regressor_with_SD_features_inst_seg_unet, self).__init__()
+        self.n_channels = n_channels
+        self.n_classes = n_classes
+        self.bilinear = bilinear
+        self.norm = nn.LayerNorm(normalized_shape=(64, 64))
+        self.inc_0 = (DoubleConv(n_channels, 3))
+        self.inc = (DoubleConv(3, 64))
+        self.down1 = (Down(64, 128))
+        self.down2 = (Down(128, 256))
+        self.down3 = (Down(256, 512))
+        factor = 2 if bilinear else 1
+        self.down4 = (Down(512, 1024 // factor))
+        self.up1 = (Up(1024, 512 // factor, bilinear))
+        self.up2 = (Up(512, 256 // factor, bilinear))
+        self.up3 = (Up(256, 128 // factor, bilinear))
+        self.up4 = (Up(128, 64, bilinear))
+        self.outc = (OutConv(64, n_classes))
+    def forward(self, img, attn_stack, feature_list):
+        attn_stack = self.norm(attn_stack)
+        unet_feature = feature_list[-1]
+        unet_feature_mean = torch.mean(unet_feature, dim=1, keepdim=True)
+        attn_stack_mean = torch.mean(attn_stack, dim=1, keepdim=True)
+        unet_feature_mean = unet_feature_mean * attn_stack_mean
+        x = torch.cat([unet_feature_mean, attn_stack], dim=1) # [1, 324, 64, 64]
+        if x.shape[-1] != 512:
+            x = F.interpolate(x, size=(512, 512), mode="bilinear")
+        x = torch.cat([img, x], dim=1) # [1, 8, 512, 512]
+        x = self.inc_0(x)
+        x1 = self.inc(x)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+        x = self.up1(x5, x4)
+        x = self.up2(x, x3)
+        x = self.up3(x, x2)
+        x = self.up4(x, x1)
+        out = self.outc(x)
+        return out
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class regressor_with_SD_features_self(nn.Module):
+    def __init__(self):
+        super(regressor_with_SD_features_self, self).__init__()
+        self.layer = nn.Sequential(
+            nn.Conv2d(4096, 1024, kernel_size=1, stride=1),
+            nn.LeakyReLU(),
+            nn.LayerNorm((64, 64)),
+            nn.Conv2d(1024, 256, kernel_size=1, stride=1),
+            nn.LeakyReLU(),
+            nn.LayerNorm((64, 64)),
+        )
+        self.layer2 = nn.Sequential(
+            nn.Conv2d(256, 128, kernel_size=3, padding=1),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(in_channels=128, out_channels=128, kernel_size=4, stride=2, padding=1),
+        )
+        self.layer3 = nn.Sequential(
+            nn.Conv2d(128, 64, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=4, stride=2, padding=1),
+        )
+        self.layer4 = nn.Sequential(
+            nn.Conv2d(64, 32, kernel_size=3, padding=1),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(in_channels=32, out_channels=32, kernel_size=4, stride=2, padding=1),
+        )
+        self.conv = nn.Sequential(
+            nn.Conv2d(32, 1, kernel_size=1),
+            nn.ReLU()
+        )
+        self.norm = nn.LayerNorm(normalized_shape=(64, 64))
+        self.initialize_weights()
+    def forward(self, self_attn):
+        self_attn = self_attn.permute(2, 0, 1)
+        self_attn = self.layer(self_attn)
+        return self_attn
+        # attn_stack = self.norm(attn_stack)
+        # unet_feature = feature_list[-1]
+        # attn_stack_mean = torch.mean(attn_stack, dim=1, keepdim=True)
+        # unet_feature = unet_feature * attn_stack_mean
+        # unet_feature = torch.cat([unet_feature, attn_stack], dim=1) # [1, 324, 64, 64]
+        # x = self.layer(unet_feature)
+        # x = self.layer2(x)
+        # x = self.layer3(x)
+        # x = self.layer4(x)
+        # out = self.conv(x)
+        # return out / 100
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class regressor_with_SD_features_latent(nn.Module):
+    def __init__(self):
+        super(regressor_with_SD_features_latent, self).__init__()
+        self.layer = nn.Sequential(
+            nn.Conv2d(4, 256, kernel_size=1, stride=1),
+            nn.LeakyReLU(),
+            nn.LayerNorm((64, 64))
+        )
+        self.layer2 = nn.Sequential(
+            nn.Conv2d(256, 128, kernel_size=3, padding=1),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(in_channels=128, out_channels=128, kernel_size=4, stride=2, padding=1),
+        )
+        self.layer3 = nn.Sequential(
+            nn.Conv2d(128, 64, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.ConvTranspose2d(in_channels=64, out_channels=64, kernel_size=4, stride=2, padding=1),
+        )
+        self.layer4 = nn.Sequential(
+            nn.Conv2d(64, 32, kernel_size=3, padding=1),
+            nn.LeakyReLU(),
+            nn.ConvTranspose2d(in_channels=32, out_channels=32, kernel_size=4, stride=2, padding=1),
+        )
+        self.conv = nn.Sequential(
+            nn.Conv2d(32, 1, kernel_size=1),
+            nn.ReLU()
+        )
+        self.norm = nn.LayerNorm(normalized_shape=(64, 64))
+        self.initialize_weights()
+    def forward(self, self_attn):
+        # self_attn = self_attn.permute(2, 0, 1)
+        self_attn = self.layer(self_attn)
+        return self_attn
+        # attn_stack = self.norm(attn_stack)
+        # unet_feature = feature_list[-1]
+        # attn_stack_mean = torch.mean(attn_stack, dim=1, keepdim=True)
+        # unet_feature = unet_feature * attn_stack_mean
+        # unet_feature = torch.cat([unet_feature, attn_stack], dim=1) # [1, 324, 64, 64]
+        # x = self.layer(unet_feature)
+        # x = self.layer2(x)
+        # x = self.layer3(x)
+        # x = self.layer4(x)
+        # out = self.conv(x)
+        # return out / 100
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+class regressor_with_deconv(nn.Module):
+    def __init__(self):
+        super(regressor_with_deconv, self).__init__()
+        self.conv1 = nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1)
+        self.conv2 = nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1)
+        self.conv3 = nn.Conv2d(4, 1, kernel_size=3, stride=1, padding=1)
+        self.deconv1 = nn.ConvTranspose2d(4, 4, kernel_size=4, stride=2, padding=1)
+        self.deconv2 = nn.ConvTranspose2d(4, 4, kernel_size=4, stride=2, padding=1)
+        self.leaky_relu = nn.LeakyReLU()
+        self.relu = nn.ReLU()
+        self.initialize_weights()
+    def forward(self, x):
+        x_ = self.conv1(x)
+        x_ = self.leaky_relu(x_)
+        x_ = self.deconv1(x_)
+        x_ = self.conv2(x_)
+        x_ = self.leaky_relu(x_)
+        x_ = self.deconv2(x_)
+        x_ = self.conv3(x_)
+        x_ = self.relu(x_)
+        out = x_
+        return out
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)

models/seg_post_model/cellpose/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .version import version, version_str

models/seg_post_model/cellpose/__main__.py ADDED Viewed

	@@ -0,0 +1,272 @@

+"""
+Copyright © 2025 Howard Hughes Medical Institute, Authored by Carsen Stringer , Michael Rariden and Marius Pachitariu.
+"""
+import os, time
+import numpy as np
+from tqdm import tqdm
+from cellpose import utils, models, io, train
+from .version import version_str
+from cellpose.cli import get_arg_parser
+try:
+    from cellpose.gui import gui3d, gui
+    GUI_ENABLED = True
+except ImportError as err:
+    GUI_ERROR = err
+    GUI_ENABLED = False
+    GUI_IMPORT = True
+except Exception as err:
+    GUI_ENABLED = False
+    GUI_ERROR = err
+    GUI_IMPORT = False
+    raise
+import logging
+def main():
+    """ Run cellpose from command line
+    """
+    args = get_arg_parser().parse_args()  # this has to be in a separate file for autodoc to work
+    if args.version:
+        print(version_str)
+        return
+    ######## if no image arguments are provided, run GUI or add model and exit ########
+    if len(args.dir) == 0 and len(args.image_path) == 0:
+        if args.add_model:
+            io.add_model(args.add_model)
+            return
+        else:
+            if not GUI_ENABLED:
+                print("GUI ERROR: %s" % GUI_ERROR)
+                if GUI_IMPORT:
+                    print(
+                        "GUI FAILED: GUI dependencies may not be installed, to install, run"
+                    )
+                    print("     pip install 'cellpose[gui]'")
+            else:
+                if args.Zstack:
+                    gui3d.run()
+                else:
+                    gui.run()
+            return
+    ############################## run cellpose on images ##############################
+    if args.verbose:
+        from .io import logger_setup
+        logger, log_file = logger_setup()
+    else:
+        print(
+            ">>>> !LOGGING OFF BY DEFAULT! To see cellpose progress, set --verbose")
+        print("No --verbose => no progress or info printed")
+        logger = logging.getLogger(__name__)
+    # find images
+    if len(args.img_filter) > 0:
+        image_filter = args.img_filter
+    else:
+        image_filter = None
+    device, gpu = models.assign_device(use_torch=True, gpu=args.use_gpu,
+                                        device=args.gpu_device)
+    if args.pretrained_model is None or args.pretrained_model == "None" or args.pretrained_model == "False" or args.pretrained_model == "0":
+        pretrained_model = "cpsam"
+        logger.warning("training from scratch is disabled, using 'cpsam' model")
+    else:
+        pretrained_model = args.pretrained_model
+    # Warn users about old arguments from CP3:
+    if args.pretrained_model_ortho:
+        logger.warning(
+            "the '--pretrained_model_ortho' flag is deprecated in v4.0.1+ and no longer used")
+    if args.train_size:
+        logger.warning("the '--train_size' flag is deprecated in v4.0.1+ and no longer used")
+    if args.chan or args.chan2:
+        logger.warning('--chan and --chan2 are deprecated, all channels are used by default')
+    if args.all_channels:
+        logger.warning("the '--all_channels' flag is deprecated in v4.0.1+ and no longer used")
+    if args.restore_type:
+        logger.warning("the '--restore_type' flag is deprecated in v4.0.1+ and no longer used")
+    if args.transformer:
+        logger.warning("the '--tranformer' flag is deprecated in v4.0.1+ and no longer used")
+    if args.invert:
+        logger.warning("the '--invert' flag is deprecated in v4.0.1+ and no longer used")
+    if args.chan2_restore:
+        logger.warning("the '--chan2_restore' flag is deprecated in v4.0.1+ and no longer used")
+    if args.diam_mean:
+        logger.warning("the '--diam_mean' flag is deprecated in v4.0.1+ and no longer used")
+    if args.train_size:
+        logger.warning("the '--train_size' flag is deprecated in v4.0.1+ and no longer used")
+    if args.norm_percentile is not None:
+        value1, value2 = args.norm_percentile
+        normalize = {'percentile': (float(value1), float(value2))}
+    else:
+        normalize = (not args.no_norm)
+    if args.save_each:
+        if not args.save_every:
+            raise ValueError("ERROR: --save_each requires --save_every")
+    if len(args.image_path) > 0 and args.train:
+        raise ValueError("ERROR: cannot train model with single image input")
+    ## Run evaluation on images
+    if not args.train:
+        _evaluate_cellposemodel_cli(args, logger, image_filter, device, pretrained_model, normalize)
+    ## Train a model ##
+    else:
+        _train_cellposemodel_cli(args, logger, image_filter, device, pretrained_model, normalize)
+def _train_cellposemodel_cli(args, logger, image_filter, device, pretrained_model, normalize):
+    test_dir = None if len(args.test_dir) == 0 else args.test_dir
+    images, labels, image_names, train_probs = None, None, None, None
+    test_images, test_labels, image_names_test, test_probs = None, None, None, None
+    compute_flows = False
+    if len(args.file_list) > 0:
+        if os.path.exists(args.file_list):
+            dat = np.load(args.file_list, allow_pickle=True).item()
+            image_names = dat["train_files"]
+            image_names_test = dat.get("test_files", None)
+            train_probs = dat.get("train_probs", None)
+            test_probs = dat.get("test_probs", None)
+            compute_flows = dat.get("compute_flows", False)
+            load_files = False
+        else:
+            logger.critical(f"ERROR: {args.file_list} does not exist")
+    else:
+        output = io.load_train_test_data(args.dir, test_dir, image_filter,
+                                                args.mask_filter,
+                                                args.look_one_level_down)
+        images, labels, image_names, test_images, test_labels, image_names_test = output
+        load_files = True
+    # initialize model
+    model = models.CellposeModel(device=device, pretrained_model=pretrained_model)
+    # train segmentation model
+    cpmodel_path = train.train_seg(
+            model.net, images, labels, train_files=image_names,
+            test_data=test_images, test_labels=test_labels,
+            test_files=image_names_test, train_probs=train_probs,
+            test_probs=test_probs, compute_flows=compute_flows,
+            load_files=load_files, normalize=normalize,
+            channel_axis=args.channel_axis,
+            learning_rate=args.learning_rate, weight_decay=args.weight_decay,
+            SGD=args.SGD, n_epochs=args.n_epochs, batch_size=args.train_batch_size,
+            min_train_masks=args.min_train_masks,
+            nimg_per_epoch=args.nimg_per_epoch,
+            nimg_test_per_epoch=args.nimg_test_per_epoch,
+            save_path=os.path.realpath(args.dir),
+            save_every=args.save_every,
+            save_each=args.save_each,
+            model_name=args.model_name_out)[0]
+    model.pretrained_model = cpmodel_path
+    logger.info(">>>> model trained and saved to %s" % cpmodel_path)
+    return model
+def _evaluate_cellposemodel_cli(args, logger, imf, device, pretrained_model, normalize):
+    # Check with user if they REALLY mean to run without saving anything
+    if not args.train:
+        saving_something = args.save_png or args.save_tif or args.save_flows or args.save_txt
+    tic = time.time()
+    if len(args.dir) > 0:
+        image_names = io.get_image_files(
+                args.dir, args.mask_filter, imf=imf,
+                look_one_level_down=args.look_one_level_down)
+    else:
+        if os.path.exists(args.image_path):
+            image_names = [args.image_path]
+        else:
+            raise ValueError(f"ERROR: no file found at {args.image_path}")
+    nimg = len(image_names)
+    if args.savedir:
+        if not os.path.exists(args.savedir):
+            raise FileExistsError(f"--savedir {args.savedir} does not exist")
+    logger.info(
+            ">>>> running cellpose on %d images using all channels" % nimg)
+    # handle built-in model exceptions
+    model = models.CellposeModel(device=device, pretrained_model=pretrained_model,)
+    tqdm_out = utils.TqdmToLogger(logger, level=logging.INFO)
+    channel_axis = args.channel_axis
+    z_axis = args.z_axis
+    for image_name in tqdm(image_names, file=tqdm_out):
+        if args.do_3D or args.stitch_threshold > 0.:
+            logger.info('loading image as 3D zstack')
+            image = io.imread_3D(image_name)
+            if channel_axis is None:
+                channel_axis = 3
+            if z_axis is None:
+                z_axis = 0
+        else:
+            image = io.imread_2D(image_name)
+        out = model.eval(
+                image,
+                diameter=args.diameter,
+                do_3D=args.do_3D,
+                augment=args.augment,
+                flow_threshold=args.flow_threshold,
+                cellprob_threshold=args.cellprob_threshold,
+                stitch_threshold=args.stitch_threshold,
+                min_size=args.min_size,
+                batch_size=args.batch_size,
+                bsize=args.bsize,
+                resample=not args.no_resample,
+                normalize=normalize,
+                channel_axis=channel_axis,
+                z_axis=z_axis,
+                anisotropy=args.anisotropy,
+                niter=args.niter,
+                flow3D_smooth=args.flow3D_smooth)
+        masks, flows = out[:2]
+        if args.exclude_on_edges:
+            masks = utils.remove_edge_masks(masks)
+        if not args.no_npy:
+            io.masks_flows_to_seg(image, masks, flows, image_name,
+                                        imgs_restore=None,
+                                        restore_type=None,
+                                        ratio=1.)
+        if saving_something:
+            suffix = "_cp_masks"
+            if args.output_name is not None:
+                    # (1) If `savedir` is not defined, then must have a non-zero `suffix`
+                if args.savedir is None and len(args.output_name) > 0:
+                    suffix = args.output_name
+                elif args.savedir is not None and not os.path.samefile(args.savedir, args.dir):
+                        # (2) If `savedir` is defined, and different from `dir` then
+                        # takes the value passed as a param. (which can be empty string)
+                    suffix = args.output_name
+            io.save_masks(image, masks, flows, image_name,
+                                suffix=suffix, png=args.save_png,
+                                tif=args.save_tif, save_flows=args.save_flows,
+                                save_outlines=args.save_outlines,
+                                dir_above=args.dir_above, savedir=args.savedir,
+                                save_txt=args.save_txt, in_folders=args.in_folders,
+                                save_mpl=args.save_mpl)
+        if args.save_rois:
+            io.save_rois(masks, image_name)
+    logger.info(">>>> completed in %0.3f sec" % (time.time() - tic))
+    return model
+if __name__ == "__main__":
+    main()

models/seg_post_model/cellpose/cli.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+Copyright © 2023 Howard Hughes Medical Institute, Authored by Carsen Stringer and Marius Pachitariu and Michael Rariden.
+"""
+import argparse
+def get_arg_parser():
+    """ Parses command line arguments for cellpose main function
+    Note: this function has to be in a separate file to allow autodoc to work for CLI.
+    The autodoc_mock_imports in conf.py does not work for sphinx-argparse sometimes,
+    see https://github.com/ashb/sphinx-argparse/issues/9#issue-1097057823
+    """
+    parser = argparse.ArgumentParser(description="Cellpose Command Line Parameters")
+    # misc settings
+    parser.add_argument("--version", action="store_true",
+                        help="show cellpose version info")
+    parser.add_argument(
+        "--verbose", action="store_true",
+        help="show information about running and settings and save to log")
+    parser.add_argument("--Zstack", action="store_true", help="run GUI in 3D mode")
+    # settings for CPU vs GPU
+    hardware_args = parser.add_argument_group("Hardware Arguments")
+    hardware_args.add_argument("--use_gpu", action="store_true",
+                               help="use gpu if torch with cuda installed")
+    hardware_args.add_argument(
+        "--gpu_device", required=False, default="0", type=str,
+        help="which gpu device to use, use an integer for torch, or mps for M1")
+    # settings for locating and formatting images
+    input_img_args = parser.add_argument_group("Input Image Arguments")
+    input_img_args.add_argument("--dir", default=[], type=str,
+                                help="folder containing data to run or train on.")
+    input_img_args.add_argument(
+        "--image_path", default=[], type=str, help=
+        "if given and --dir not given, run on single image instead of folder (cannot train with this option)"
+    )
+    input_img_args.add_argument(
+        "--look_one_level_down", action="store_true",
+        help="run processing on all subdirectories of current folder")
+    input_img_args.add_argument("--img_filter", default=[], type=str,
+                                help="end string for images to run on")
+    input_img_args.add_argument(
+        "--channel_axis", default=None, type=int,
+        help="axis of image which corresponds to image channels")
+    input_img_args.add_argument("--z_axis", default=None, type=int,
+                                help="axis of image which corresponds to Z dimension")
+    # TODO: remove deprecated in future version
+    input_img_args.add_argument(
+        "--chan", default=0, type=int, help=
+        "Deprecated in v4.0.1+, not used. ")
+    input_img_args.add_argument(
+        "--chan2", default=0, type=int, help=
+        'Deprecated in v4.0.1+, not used. ')
+    input_img_args.add_argument("--invert", action="store_true", help=
+        'Deprecated in v4.0.1+, not used. ')
+    input_img_args.add_argument(
+        "--all_channels", action="store_true", help=
+        'Deprecated in v4.0.1+, not used. ')
+    # model settings
+    model_args = parser.add_argument_group("Model Arguments")
+    model_args.add_argument("--pretrained_model", required=False, default="cpsam",
+                            type=str,
+                            help="model to use for running or starting training")
+    model_args.add_argument(
+        "--add_model", required=False, default=None, type=str,
+        help="model path to copy model to hidden .cellpose folder for using in GUI/CLI")
+    model_args.add_argument("--pretrained_model_ortho", required=False, default=None,
+                            type=str,
+                            help="Deprecated in v4.0.1+, not used. ")
+    # TODO: remove deprecated in future version
+    model_args.add_argument("--restore_type", required=False, default=None, type=str, help=
+        'Deprecated in v4.0.1+, not used. ')
+    model_args.add_argument("--chan2_restore", action="store_true", help=
+        'Deprecated in v4.0.1+, not used. ')
+    model_args.add_argument(
+        "--transformer", action="store_true", help=
+        "use transformer backbone (pretrained_model from Cellpose3 is transformer_cp3)")
+    # algorithm settings
+    algorithm_args = parser.add_argument_group("Algorithm Arguments")
+    algorithm_args.add_argument("--no_norm", action="store_true",
+                                help="do not normalize images (normalize=False)")
+    algorithm_args.add_argument(
+        '--norm_percentile',
+        nargs=2,  # Require exactly two values
+        metavar=('VALUE1', 'VALUE2'),
+        help="Provide two float values to set norm_percentile (e.g., --norm_percentile 1 99)"
+    )
+    algorithm_args.add_argument(
+        "--do_3D", action="store_true",
+        help="process images as 3D stacks of images (nplanes x nchan x Ly x Lx")
+    algorithm_args.add_argument(
+        "--diameter", required=False, default=None, type=float, help=
+        "use to resize cells to the training diameter (30 pixels)"
+    )
+    algorithm_args.add_argument(
+        "--stitch_threshold", required=False, default=0.0, type=float,
+        help="compute masks in 2D then stitch together masks with IoU>0.9 across planes"
+    )
+    algorithm_args.add_argument(
+        "--min_size", required=False, default=15, type=int,
+        help="minimum number of pixels per mask, can turn off with -1")
+    algorithm_args.add_argument(
+        "--flow3D_smooth", required=False, default=0, type=float,
+        help="stddev of gaussian for smoothing of dP for dynamics in 3D, default of 0 means no smoothing")
+    algorithm_args.add_argument(
+        "--flow_threshold", default=0.4, type=float, help=
+        "flow error threshold, 0 turns off this optional QC step. Default: %(default)s")
+    algorithm_args.add_argument(
+        "--cellprob_threshold", default=0, type=float,
+        help="cellprob threshold, default is 0, decrease to find more and larger masks")
+    algorithm_args.add_argument(
+        "--niter", default=0, type=int, help=
+        "niter, number of iterations for dynamics for mask creation, default of 0 means it is proportional to diameter, set to a larger number like 2000 for very long ROIs"
+    )
+    algorithm_args.add_argument("--anisotropy", required=False, default=1.0, type=float,
+                                help="anisotropy of volume in 3D")
+    algorithm_args.add_argument("--exclude_on_edges", action="store_true",
+                                help="discard masks which touch edges of image")
+    algorithm_args.add_argument(
+        "--augment", action="store_true",
+        help="tiles image with overlapping tiles and flips overlapped regions to augment"
+    )
+    algorithm_args.add_argument("--batch_size", default=8, type=int,
+                               help="inference batch size. Default: %(default)s")
+    # TODO: remove deprecated in future version
+    algorithm_args.add_argument(
+        "--no_resample", action="store_true",
+        help="disables flows/cellprob resampling to original image size before computing masks. Using this flag will make more masks more jagged with larger diameter settings.")
+    algorithm_args.add_argument(
+        "--no_interp", action="store_true",
+        help="do not interpolate when running dynamics (was default)")
+    # output settings
+    output_args = parser.add_argument_group("Output Arguments")
+    output_args.add_argument(
+        "--save_png", action="store_true",
+        help="save masks as png")
+    output_args.add_argument(
+        "--save_tif", action="store_true",
+        help="save masks as tif")
+    output_args.add_argument(
+        "--output_name", default=None, type=str,
+        help="suffix for saved masks, default is _cp_masks, can be empty if `savedir` used and different of `dir`")
+    output_args.add_argument("--no_npy", action="store_true",
+                             help="suppress saving of npy")
+    output_args.add_argument(
+        "--savedir", default=None, type=str, help=
+        "folder to which segmentation results will be saved (defaults to input image directory)"
+    )
+    output_args.add_argument(
+        "--dir_above", action="store_true", help=
+        "save output folders adjacent to image folder instead of inside it (off by default)"
+    )
+    output_args.add_argument("--in_folders", action="store_true",
+                             help="flag to save output in folders (off by default)")
+    output_args.add_argument(
+        "--save_flows", action="store_true", help=
+        "whether or not to save RGB images of flows when masks are saved (disabled by default)"
+    )
+    output_args.add_argument(
+        "--save_outlines", action="store_true", help=
+        "whether or not to save RGB outline images when masks are saved (disabled by default)"
+    )
+    output_args.add_argument(
+        "--save_rois", action="store_true",
+        help="whether or not to save ImageJ compatible ROI archive (disabled by default)"
+    )
+    output_args.add_argument(
+        "--save_txt", action="store_true",
+        help="flag to enable txt outlines for ImageJ (disabled by default)")
+    output_args.add_argument(
+        "--save_mpl", action="store_true",
+        help="save a figure of image/mask/flows using matplotlib (disabled by default). "
+        "This is slow, especially with large images.")
+    # training settings
+    training_args = parser.add_argument_group("Training Arguments")
+    training_args.add_argument("--train", action="store_true",
+                               help="train network using images in dir")
+    training_args.add_argument("--test_dir", default=[], type=str,
+                               help="folder containing test data (optional)")
+    training_args.add_argument(
+        "--file_list", default=[], type=str, help=
+        "path to list of files for training and testing and probabilities for each image (optional)"
+    )
+    training_args.add_argument(
+        "--mask_filter", default="_masks", type=str, help=
+        "end string for masks to run on. use '_seg.npy' for manual annotations from the GUI. Default: %(default)s"
+    )
+    training_args.add_argument("--learning_rate", default=1e-5, type=float,
+                               help="learning rate. Default: %(default)s")
+    training_args.add_argument("--weight_decay", default=0.1, type=float,
+                               help="weight decay. Default: %(default)s")
+    training_args.add_argument("--n_epochs", default=100, type=int,
+                               help="number of epochs. Default: %(default)s")
+    training_args.add_argument("--train_batch_size", default=1, type=int,
+                               help="training batch size. Default: %(default)s")
+    training_args.add_argument("--bsize", default=256, type=int,
+                               help="block size for tiles. Default: %(default)s")
+    training_args.add_argument(
+        "--nimg_per_epoch", default=None, type=int,
+        help="number of train images per epoch. Default is to use all train images.")
+    training_args.add_argument(
+        "--nimg_test_per_epoch", default=None, type=int,
+        help="number of test images per epoch. Default is to use all test images.")
+    training_args.add_argument(
+        "--min_train_masks", default=5, type=int, help=
+        "minimum number of masks a training image must have to be used. Default: %(default)s"
+    )
+    training_args.add_argument("--SGD", default=0, type=int,
+                               help="Deprecated in v4.0.1+, not used - AdamW used instead. ")
+    training_args.add_argument(
+        "--save_every", default=100, type=int,
+        help="number of epochs to skip between saves. Default: %(default)s")
+    training_args.add_argument(
+        "--save_each", action="store_true",
+        help="wether or not to save each epoch. Must also use --save_every. (default: False)")
+    training_args.add_argument(
+        "--model_name_out", default=None, type=str,
+        help="Name of model to save as, defaults to name describing model architecture. "
+        "Model is saved in the folder specified by --dir in models subfolder.")
+    # TODO: remove deprecated in future version
+    training_args.add_argument(
+        "--diam_mean", default=30., type=float, help=
+        'Deprecated in v4.0.1+, not used. ')
+    training_args.add_argument("--train_size", action="store_true", help=
+        'Deprecated in v4.0.1+, not used. ')
+    return parser

models/seg_post_model/cellpose/core.py ADDED Viewed

	@@ -0,0 +1,322 @@

+"""
+Copyright © 2025 Howard Hughes Medical Institute, Authored by Carsen Stringer , Michael Rariden and Marius Pachitariu.
+"""
+import logging
+import numpy as np
+from tqdm import trange
+from . import transforms, utils
+import torch
+TORCH_ENABLED = True
+core_logger = logging.getLogger(__name__)
+tqdm_out = utils.TqdmToLogger(core_logger, level=logging.INFO)
+def use_gpu(gpu_number=0, use_torch=True):
+    """
+    Check if GPU is available for use.
+    Args:
+        gpu_number (int): The index of the GPU to be used. Default is 0.
+        use_torch (bool): Whether to use PyTorch for GPU check. Default is True.
+    Returns:
+        bool: True if GPU is available, False otherwise.
+    Raises:
+        ValueError: If use_torch is False, as cellpose only runs with PyTorch now.
+    """
+    if use_torch:
+        return _use_gpu_torch(gpu_number)
+    else:
+        raise ValueError("cellpose only runs with PyTorch now")
+def _use_gpu_torch(gpu_number=0):
+    """
+    Checks if CUDA or MPS is available and working with PyTorch.
+    Args:
+        gpu_number (int): The GPU device number to use (default is 0).
+    Returns:
+        bool: True if CUDA or MPS is available and working, False otherwise.
+    """
+    try:
+        device = torch.device("cuda:" + str(gpu_number))
+        _ = torch.zeros((1,1)).to(device)
+        core_logger.info("** TORCH CUDA version installed and working. **")
+        return True
+    except:
+        pass
+    try:
+        device = torch.device('mps:' + str(gpu_number))
+        _ = torch.zeros((1,1)).to(device)
+        core_logger.info('** TORCH MPS version installed and working. **')
+        return True
+    except:
+        core_logger.info('Neither TORCH CUDA nor MPS version not installed/working.')
+        return False
+def assign_device(use_torch=True, gpu=False, device=0):
+    """
+    Assigns the device (CPU or GPU or mps) to be used for computation.
+    Args:
+        use_torch (bool, optional): Whether to use torch for GPU detection. Defaults to True.
+        gpu (bool, optional): Whether to use GPU for computation. Defaults to False.
+        device (int or str, optional): The device index or name to be used. Defaults to 0.
+    Returns:
+        torch.device, bool (True if GPU is used, False otherwise)
+    """
+    if isinstance(device, str):
+        if device != "mps" or not(gpu and torch.backends.mps.is_available()):
+            device = int(device)
+    if gpu and use_gpu(use_torch=True):
+        try:
+            if torch.cuda.is_available():
+                device = torch.device(f'cuda:{device}')
+                core_logger.info(">>>> using GPU (CUDA)")
+                gpu = True
+                cpu = False
+        except:
+            gpu = False
+            cpu = True
+        try:
+            if torch.backends.mps.is_available():
+                device = torch.device('mps')
+                core_logger.info(">>>> using GPU (MPS)")
+                gpu = True
+                cpu = False
+        except:
+            gpu = False
+            cpu = True
+    else:
+        device = torch.device('cpu')
+        core_logger.info('>>>> using CPU')
+        gpu = False
+        cpu = True
+    if cpu:
+        device = torch.device("cpu")
+        core_logger.info(">>>> using CPU")
+        gpu = False
+    return device, gpu
+def _to_device(x, device, dtype=torch.float32):
+    """
+    Converts the input tensor or numpy array to the specified device.
+    Args:
+        x (torch.Tensor or numpy.ndarray): The input tensor or numpy array.
+        device (torch.device): The target device.
+    Returns:
+        torch.Tensor: The converted tensor on the specified device.
+    """
+    if not isinstance(x, torch.Tensor):
+        X = torch.from_numpy(x).to(device, dtype=dtype)
+        return X
+    else:
+        return x
+def _from_device(X):
+    """
+    Converts a PyTorch tensor from the device to a NumPy array on the CPU.
+    Args:
+        X (torch.Tensor): The input PyTorch tensor.
+    Returns:
+        numpy.ndarray: The converted NumPy array.
+    """
+    # The cast is so numpy conversion always works
+    x = X.detach().cpu().to(torch.float32).numpy()
+    return x
+def _forward(net, x, feat=None):
+    """Converts images to torch tensors, runs the network model, and returns numpy arrays.
+    Args:
+        net (torch.nn.Module): The network model.
+        x (numpy.ndarray): The input images.
+    Returns:
+        Tuple[numpy.ndarray, numpy.ndarray]: The output predictions (flows and cellprob) and style features.
+    """
+    X = _to_device(x, device=net.device, dtype=net.dtype)
+    if feat is not None:
+        feat = _to_device(feat, device=net.device, dtype=net.dtype)
+    net.eval()
+    with torch.no_grad():
+        y, style = net(X, feat=feat)[:2]
+    del X
+    y = _from_device(y)
+    style = _from_device(style)
+    return y, style
+def run_net(net, imgi, feat=None, batch_size=8, augment=False, tile_overlap=0.1, bsize=224,
+            rsz=None):
+    """
+    Run network on stack of images.
+    (faster if augment is False)
+    Args:
+        net (class): cellpose network (model.net)
+        imgi (np.ndarray): The input image or stack of images of size [Lz x Ly x Lx x nchan].
+        batch_size (int, optional): Number of tiles to run in a batch. Defaults to 8.
+        rsz (float, optional): Resize coefficient(s) for image. Defaults to 1.0.
+        augment (bool, optional): Tiles image with overlapping tiles and flips overlapped regions to augment. Defaults to False.
+        tile_overlap (float, optional): Fraction of overlap of tiles when computing flows. Defaults to 0.1.
+        bsize (int, optional): Size of tiles to use in pixels [bsize x bsize]. Defaults to 224.
+    Returns:
+        Tuple[numpy.ndarray, numpy.ndarray]: outputs of network y and style. If tiled `y` is averaged in tile overlaps. Size of [Ly x Lx x 3] or [Lz x Ly x Lx x 3].
+            y[...,0] is Y flow; y[...,1] is X flow; y[...,2] is cell probability.
+            style is a 1D array of size 256 summarizing the style of the image, if tiled `style` is averaged over tiles.
+    """
+    # run network
+    Lz, Ly0, Lx0, nchan = imgi.shape
+    if rsz is not None:
+        if not isinstance(rsz, list) and not isinstance(rsz, np.ndarray):
+            rsz = [rsz, rsz]
+        Lyr, Lxr = int(Ly0 * rsz[0]), int(Lx0 * rsz[1])
+    else:
+        Lyr, Lxr = Ly0, Lx0 # 512, 512
+    ly, lx = bsize, bsize # 256, 256
+    ypad1, ypad2, xpad1, xpad2 = transforms.get_pad_yx(Lyr, Lxr, min_size=(bsize, bsize)) # 8
+    Ly, Lx = Lyr + ypad1 + ypad2, Lxr + xpad1 + xpad2 # 528, 528
+    pads = np.array([[0, 0], [ypad1, ypad2], [xpad1, xpad2]])
+    if augment:
+        ny = max(2, int(np.ceil(2. * Ly / bsize)))
+        nx = max(2, int(np.ceil(2. * Lx / bsize)))
+    else:
+        ny = 1 if Ly <= bsize else int(np.ceil((1. + 2 * tile_overlap) * Ly / bsize)) # 3
+        nx = 1 if Lx <= bsize else int(np.ceil((1. + 2 * tile_overlap) * Lx / bsize)) # 3
+    # run multiple slices at the same time
+    ntiles = ny * nx
+    nimgs = max(1, batch_size // ntiles) # number of imgs to run in the same batch, 1
+    niter = int(np.ceil(Lz / nimgs)) # 1
+    ziterator = (trange(niter, file=tqdm_out, mininterval=30)
+                    if niter > 10 or Lz > 1 else range(niter))
+    for k in ziterator:
+        inds = np.arange(k * nimgs, min(Lz, (k + 1) * nimgs))
+        IMGa = np.zeros((ntiles * len(inds), nchan, ly, lx), "float32") # 9, 3, 256, 256
+        if feat is not None:
+            FEATa = np.zeros((ntiles * len(inds), nchan, ly, lx), "float32") # 9, 256
+        else:
+            FEATa = None
+        for i, b in enumerate(inds):
+            # pad image for net so Ly and Lx are divisible by 4
+            imgb = transforms.resize_image(imgi[b], rsz=rsz) if rsz is not None else imgi[b].copy()
+            imgb = np.pad(imgb.transpose(2,0,1), pads, mode="constant") # 3, 528, 528
+            IMG, ysub, xsub, Lyt, Lxt = transforms.make_tiles(
+                imgb, bsize=bsize, augment=augment,
+                tile_overlap=tile_overlap) # IMG: 3, 3, 3, 256, 256
+            IMGa[i * ntiles : (i+1) * ntiles] = np.reshape(IMG,
+                                            (ny * nx, nchan, ly, lx))
+            if feat is not None:
+                featb = transforms.resize_image(feat[b], rsz=rsz) if rsz is not None else feat[b].copy()
+                featb = np.pad(featb.transpose(2,0,1), pads, mode="constant")
+                FEAT, ysub, xsub, Lyt, Lxt = transforms.make_tiles(
+                    featb, bsize=bsize, augment=augment,
+                    tile_overlap=tile_overlap)
+                FEATa[i * ntiles : (i+1) * ntiles] = np.reshape(FEAT,
+                                            (ny * nx, nchan, ly, lx))
+        # run network
+        for j in range(0, IMGa.shape[0], batch_size):
+            bslc = slice(j, min(j + batch_size, IMGa.shape[0]))
+            ya0, stylea0 = _forward(net, IMGa[bslc], feat=FEATa[bslc] if FEATa is not None else None)
+            if j == 0:
+                nout = ya0.shape[1]
+                ya = np.zeros((IMGa.shape[0], nout, ly, lx), "float32")
+                stylea = np.zeros((IMGa.shape[0], 256), "float32")
+            ya[bslc] = ya0
+            stylea[bslc] = stylea0
+        # average tiles
+        for i, b in enumerate(inds):
+            if i==0 and k==0:
+                yf = np.zeros((Lz, nout, Ly, Lx), "float32")
+                styles = np.zeros((Lz, 256), "float32")
+            y = ya[i * ntiles : (i + 1) * ntiles]
+            if augment:
+                y = np.reshape(y, (ny, nx, 3, ly, lx))
+                y = transforms.unaugment_tiles(y)
+                y = np.reshape(y, (-1, 3, ly, lx))
+            yfi = transforms.average_tiles(y, ysub, xsub, Lyt, Lxt)
+            yf[b] = yfi[:, :imgb.shape[-2], :imgb.shape[-1]]
+            stylei = stylea[i * ntiles:(i + 1) * ntiles].sum(axis=0)
+            stylei /= (stylei**2).sum()**0.5
+            styles[b] = stylei
+    # slices from padding
+    yf = yf[:, :, ypad1 : Ly-ypad2, xpad1 : Lx-xpad2]
+    yf = yf.transpose(0,2,3,1)
+    return yf, np.array(styles)
+def run_3D(net, imgs, batch_size=8, augment=False,
+           tile_overlap=0.1, bsize=224, net_ortho=None,
+           progress=None):
+    """
+    Run network on image z-stack.
+    (faster if augment is False)
+    Args:
+        imgs (np.ndarray): The input image stack of size [Lz x Ly x Lx x nchan].
+        batch_size (int, optional): Number of tiles to run in a batch. Defaults to 8.
+        rsz (float, optional): Resize coefficient(s) for image. Defaults to 1.0.
+        anisotropy (float, optional): for 3D segmentation, optional rescaling factor (e.g. set to 2.0 if Z is sampled half as dense as X or Y). Defaults to None.
+        augment (bool, optional): Tiles image with overlapping tiles and flips overlapped regions to augment. Defaults to False.
+        tile_overlap (float, optional): Fraction of overlap of tiles when computing flows. Defaults to 0.1.
+        bsize (int, optional): Size of tiles to use in pixels [bsize x bsize]. Defaults to 224.
+        net_ortho (class, optional): cellpose network for orthogonal ZY and ZX planes. Defaults to None.
+        progress (QProgressBar, optional): pyqt progress bar. Defaults to None.
+    Returns:
+        Tuple[numpy.ndarray, numpy.ndarray]: outputs of network y and style. If tiled `y` is averaged in tile overlaps. Size of [Ly x Lx x 3] or [Lz x Ly x Lx x 3].
+            y[...,0] is Z flow; y[...,1] is Y flow; y[...,2] is X flow; y[...,3] is cell probability.
+            style is a 1D array of size 256 summarizing the style of the image, if tiled `style` is averaged over tiles.
+    """
+    sstr = ["YX", "ZY", "ZX"]
+    pm = [(0, 1, 2, 3), (1, 0, 2, 3), (2, 0, 1, 3)]
+    ipm = [(0, 1, 2), (1, 0, 2), (1, 2, 0)]
+    cp = [(1, 2), (0, 2), (0, 1)]
+    cpy = [(0, 1), (0, 1), (0, 1)]
+    shape = imgs.shape[:-1]
+    yf = np.zeros((*shape, 4), "float32")
+    for p in range(3):
+        xsl = imgs.transpose(pm[p])
+        # per image
+        core_logger.info("running %s: %d planes of size (%d, %d)" %
+                         (sstr[p], shape[pm[p][0]], shape[pm[p][1]], shape[pm[p][2]]))
+        y, style = run_net(net,
+                           xsl, batch_size=batch_size, augment=augment,
+                           bsize=bsize, tile_overlap=tile_overlap,
+                           rsz=None)
+        yf[..., -1] += y[..., -1].transpose(ipm[p])
+        for j in range(2):
+            yf[..., cp[p][j]] += y[..., cpy[p][j]].transpose(ipm[p])
+        y = None; del y
+        if progress is not None:
+            progress.setValue(25 + 15 * p)
+    return yf, style

models/seg_post_model/cellpose/denoise.py ADDED Viewed

	@@ -0,0 +1,1474 @@

+"""
+Copyright © 2025 Howard Hughes Medical Institute, Authored by Carsen Stringer , Michael Rariden and Marius Pachitariu.
+"""
+import os, time, datetime
+import numpy as np
+from scipy.stats import mode
+import cv2
+import torch
+from torch import nn
+from torch.nn.functional import conv2d, interpolate
+from tqdm import trange
+from pathlib import Path
+import logging
+denoise_logger = logging.getLogger(__name__)
+from cellpose import transforms, utils, io
+from cellpose.core import run_net
+from cellpose.models import CellposeModel, model_path, normalize_default, assign_device
+MODEL_NAMES = []
+for ctype in ["cyto3", "cyto2", "nuclei"]:
+    for ntype in ["denoise", "deblur", "upsample", "oneclick"]:
+        MODEL_NAMES.append(f"{ntype}_{ctype}")
+        if ctype != "cyto3":
+            for ltype in ["per", "seg", "rec"]:
+                MODEL_NAMES.append(f"{ntype}_{ltype}_{ctype}")
+    if ctype != "cyto3":
+        MODEL_NAMES.append(f"aniso_{ctype}")
+criterion = nn.MSELoss(reduction="mean")
+criterion2 = nn.BCEWithLogitsLoss(reduction="mean")
+def deterministic(seed=0):
+    """ set random seeds to create test data """
+    import random
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
+    np.random.seed(seed)  # Numpy module.
+    random.seed(seed)  # Python random module.
+    torch.manual_seed(seed)
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+def loss_fn_rec(lbl, y):
+    """ loss function between true labels lbl and prediction y """
+    loss = 80. * criterion(y, lbl)
+    return loss
+def loss_fn_seg(lbl, y):
+    """ loss function between true labels lbl and prediction y """
+    veci = 5. * lbl[:, 1:]
+    lbl = (lbl[:, 0] > .5).float()
+    loss = criterion(y[:, :2], veci)
+    loss /= 2.
+    loss2 = criterion2(y[:, 2], lbl)
+    loss = loss + loss2
+    return loss
+def get_sigma(Tdown):
+    """ Calculates the correlation matrices across channels for the perceptual loss.
+    Args:
+        Tdown (list): List of tensors output by each downsampling block of network.
+    Returns:
+        list: List of correlations for each input tensor.
+    """
+    Tnorm = [x - x.mean((-2, -1), keepdim=True) for x in Tdown]
+    Tnorm = [x / x.std((-2, -1), keepdim=True) for x in Tnorm]
+    Sigma = [
+        torch.einsum("bnxy, bmxy -> bnm", x, x) / (x.shape[-2] * x.shape[-1])
+        for x in Tnorm
+    ]
+    return Sigma
+def imstats(X, net1):
+    """
+    Calculates the image correlation matrices for the perceptual loss.
+    Args:
+        X (torch.Tensor): Input image tensor.
+        net1: Cellpose net.
+    Returns:
+        list: A list of tensors of correlation matrices.
+    """
+    _, _, Tdown = net1(X)
+    Sigma = get_sigma(Tdown)
+    Sigma = [x.detach() for x in Sigma]
+    return Sigma
+def loss_fn_per(img, net1, yl):
+    """
+    Calculates the perceptual loss function for image restoration.
+    Args:
+        img (torch.Tensor): Input image tensor (noisy/blurry/downsampled).
+        net1 (torch.nn.Module): Perceptual loss net (Cellpose segmentation net).
+        yl (torch.Tensor): Clean image tensor.
+    Returns:
+        torch.Tensor: Mean perceptual loss.
+    """
+    Sigma = imstats(img, net1)
+    sd = [x.std((1, 2)) + 1e-6 for x in Sigma]
+    Sigma_test = get_sigma(yl)
+    losses = torch.zeros(len(Sigma[0]), device=img.device)
+    for k in range(len(Sigma)):
+        losses = losses + (((Sigma_test[k] - Sigma[k])**2).mean((1, 2)) / sd[k]**2)
+    return losses.mean()
+def test_loss(net0, X, net1=None, img=None, lbl=None, lam=[1., 1.5, 0.]):
+    """
+    Calculates the test loss for image restoration tasks.
+    Args:
+        net0 (torch.nn.Module): The image restoration network.
+        X (torch.Tensor): The input image tensor.
+        net1 (torch.nn.Module, optional): The segmentation network for segmentation or perceptual loss. Defaults to None.
+        img (torch.Tensor, optional): Clean image tensor for perceptual or reconstruction loss. Defaults to None.
+        lbl (torch.Tensor, optional): The ground truth flows/cellprob tensor for segmentation loss. Defaults to None.
+        lam (list, optional): The weights for different loss components (perceptual, segmentation, reconstruction). Defaults to [1., 1.5, 0.].
+    Returns:
+        tuple: A tuple containing the total loss and the perceptual loss.
+    """
+    net0.eval()
+    if net1 is not None:
+        net1.eval()
+    loss, loss_per = torch.zeros(1, device=X.device), torch.zeros(1, device=X.device)
+    with torch.no_grad():
+        img_dn = net0(X)[0]
+        if lam[2] > 0.:
+            loss += lam[2] * loss_fn_rec(img, img_dn)
+        if lam[1] > 0. or lam[0] > 0.:
+            y, _, ydown = net1(img_dn)
+        if lam[1] > 0.:
+            loss += lam[1] * loss_fn_seg(lbl, y)
+        if lam[0] > 0.:
+            loss_per = loss_fn_per(img, net1, ydown)
+            loss += lam[0] * loss_per
+    return loss, loss_per
+def train_loss(net0, X, net1=None, img=None, lbl=None, lam=[1., 1.5, 0.]):
+    """
+    Calculates the train loss for image restoration tasks.
+    Args:
+        net0 (torch.nn.Module): The image restoration network.
+        X (torch.Tensor): The input image tensor.
+        net1 (torch.nn.Module, optional): The segmentation network for segmentation or perceptual loss. Defaults to None.
+        img (torch.Tensor, optional): Clean image tensor for perceptual or reconstruction loss. Defaults to None.
+        lbl (torch.Tensor, optional): The ground truth flows/cellprob tensor for segmentation loss. Defaults to None.
+        lam (list, optional): The weights for different loss components (perceptual, segmentation, reconstruction). Defaults to [1., 1.5, 0.].
+    Returns:
+        tuple: A tuple containing the total loss and the perceptual loss.
+    """
+    net0.train()
+    if net1 is not None:
+        net1.eval()
+    loss, loss_per = torch.zeros(1, device=X.device), torch.zeros(1, device=X.device)
+    img_dn = net0(X)[0]
+    if lam[2] > 0.:
+        loss += lam[2] * loss_fn_rec(img, img_dn)
+    if lam[1] > 0. or lam[0] > 0.:
+        y, _, ydown = net1(img_dn)
+    if lam[1] > 0.:
+        loss += lam[1] * loss_fn_seg(lbl, y)
+    if lam[0] > 0.:
+        loss_per = loss_fn_per(img, net1, ydown)
+        loss += lam[0] * loss_per
+    return loss, loss_per
+def img_norm(imgi):
+    """
+    Normalizes the input image by subtracting the 1st percentile and dividing by the difference between the 99th and 1st percentiles.
+    Args:
+        imgi (torch.Tensor): Input image tensor.
+    Returns:
+        torch.Tensor: Normalized image tensor.
+    """
+    shape = imgi.shape
+    imgi = imgi.reshape(imgi.shape[0], imgi.shape[1], -1)
+    perc = torch.quantile(imgi, torch.tensor([0.01, 0.99], device=imgi.device), dim=-1,
+                          keepdim=True)
+    for k in range(imgi.shape[1]):
+        hask = (perc[1, :, k, 0] - perc[0, :, k, 0]) > 1e-3
+        imgi[hask, k] -= perc[0, hask, k]
+        imgi[hask, k] /= (perc[1, hask, k] - perc[0, hask, k])
+    imgi = imgi.reshape(shape)
+    return imgi
+def add_noise(lbl, alpha=4, beta=0.7, poisson=0.7, blur=0.7, gblur=1.0, downsample=0.7,
+              ds_max=7, diams=None, pscale=None, iso=True, sigma0=None, sigma1=None,
+              ds=None, uniform_blur=False, partial_blur=False):
+    """Adds noise to the input image.
+    Args:
+        lbl (torch.Tensor): The input image tensor of shape (nimg, nchan, Ly, Lx).
+        alpha (float, optional): The shape parameter of the gamma distribution used for generating poisson noise. Defaults to 4.
+        beta (float, optional): The rate parameter of the gamma distribution used for generating poisson noise. Defaults to 0.7.
+        poisson (float, optional): The probability of adding poisson noise to the image. Defaults to 0.7.
+        blur (float, optional): The probability of adding gaussian blur to the image. Defaults to 0.7.
+        gblur (float, optional): The scale factor for the gaussian blur. Defaults to 1.0.
+        downsample (float, optional): The probability of downsampling the image. Defaults to 0.7.
+        ds_max (int, optional): The maximum downsampling factor. Defaults to 7.
+        diams (torch.Tensor, optional): The diameter of the objects in the image. Defaults to None.
+        pscale (torch.Tensor, optional): The scale factor for the poisson noise, instead of sampling. Defaults to None.
+        iso (bool, optional): Whether to use isotropic gaussian blur. Defaults to True.
+        sigma0 (torch.Tensor, optional): The standard deviation of the gaussian filter for the Y axis, instead of sampling. Defaults to None.
+        sigma1 (torch.Tensor, optional): The standard deviation of the gaussian filter for the X axis, instead of sampling. Defaults to None.
+        ds (torch.Tensor, optional): The downsampling factor for each image, instead of sampling. Defaults to None.
+    Returns:
+        torch.Tensor: The noisy image tensor of the same shape as the input image.
+    """
+    device = lbl.device
+    imgi = torch.zeros_like(lbl)
+    Ly, Lx = lbl.shape[-2:]
+    diams = diams if diams is not None else 30. * torch.ones(len(lbl), device=device)
+    #ds0 = 1 if ds is None else ds.item()
+    ds = ds * torch.ones(
+        (len(lbl),), device=device, dtype=torch.long) if ds is not None else ds
+    # downsample
+    ii = []
+    idownsample = np.random.rand(len(lbl)) < downsample
+    if (ds is None and idownsample.sum() > 0.) or not iso:
+        ds = torch.ones(len(lbl), dtype=torch.long, device=device)
+        ds[idownsample] = torch.randint(2, ds_max + 1, size=(idownsample.sum(),),
+                                        device=device)
+        ii = torch.nonzero(ds > 1).flatten()
+    elif ds is not None and (ds > 1).sum():
+        ii = torch.nonzero(ds > 1).flatten()
+    # add gaussian blur
+    iblur = torch.rand(len(lbl), device=device) < blur
+    iblur[ii] = True
+    if iblur.sum() > 0:
+        if sigma0 is None:
+            if uniform_blur and iso:
+                xr = torch.rand(len(lbl), device=device)
+                if len(ii) > 0:
+                    xr[ii] = ds[ii].float() / 2. / gblur
+                sigma0 = diams[iblur] / 30. * gblur * (1 / gblur + (1 - 1 / gblur) * xr[iblur])
+                sigma1 = sigma0.clone()
+            elif not iso:
+                xr = torch.rand(len(lbl), device=device)
+                if len(ii) > 0:
+                    xr[ii] = (ds[ii].float()) / gblur
+                    xr[ii] = xr[ii] + torch.rand(len(ii), device=device) * 0.7 - 0.35
+                    xr[ii] = torch.clip(xr[ii], 0.05, 1.5)
+                sigma0 = diams[iblur] / 30. * gblur * xr[iblur]
+                sigma1 = sigma0.clone() / 10.
+            else:
+                xrand = np.random.exponential(1, size=iblur.sum())
+                xrand = np.clip(xrand * 0.5, 0.1, 1.0)
+                xrand *= gblur
+                sigma0 = diams[iblur] / 30. * 5. * torch.from_numpy(xrand).float().to(
+                    device)
+                sigma1 = sigma0.clone()
+        else:
+            sigma0 = sigma0 * torch.ones((iblur.sum(),), device=device)
+            sigma1 = sigma1 * torch.ones((iblur.sum(),), device=device)
+        # create gaussian filter
+        xr = max(8, sigma0.max().long() * 2)
+        gfilt0 = torch.exp(-torch.arange(-xr + 1, xr, device=device)**2 /
+                           (2 * sigma0.unsqueeze(-1)**2))
+        gfilt0 /= gfilt0.sum(axis=-1, keepdims=True)
+        gfilt1 = torch.zeros_like(gfilt0)
+        gfilt1[sigma1 == sigma0] = gfilt0[sigma1 == sigma0]
+        gfilt1[sigma1 != sigma0] = torch.exp(
+            -torch.arange(-xr + 1, xr, device=device)**2 /
+            (2 * sigma1[sigma1 != sigma0].unsqueeze(-1)**2))
+        gfilt1[sigma1 == 0] = 0.
+        gfilt1[sigma1 == 0, xr] = 1.
+        gfilt1 /= gfilt1.sum(axis=-1, keepdims=True)
+        gfilt = torch.einsum("ck,cl->ckl", gfilt0, gfilt1)
+        gfilt /= gfilt.sum(axis=(1, 2), keepdims=True)
+        lbl_blur = conv2d(lbl[iblur].transpose(1, 0), gfilt.unsqueeze(1),
+                             padding=gfilt.shape[-1] // 2,
+                             groups=gfilt.shape[0]).transpose(1, 0)
+        if partial_blur:
+            #yc, xc = np.random.randint(100, Ly-100), np.random.randint(100, Lx-100)
+            imgi[iblur] = lbl[iblur].clone()
+            Lxc = int(Lx * 0.85)
+            ym, xm = torch.meshgrid(torch.zeros(Ly, dtype=torch.float32),
+                                    torch.arange(0, Lxc, dtype=torch.float32),
+                        indexing="ij")
+            mask = torch.exp(-(ym**2 + xm**2) / 2*(0.001**2))
+            mask -= mask.min()
+            mask /= mask.max()
+            lbl_blur_crop = lbl_blur[:, :, :, :Lxc]
+            imgi[iblur, :, :, :Lxc] = (lbl_blur_crop * mask +
+                                (1-mask) * imgi[iblur, :, :, :Lxc])
+        else:
+            imgi[iblur] = lbl_blur
+    imgi[~iblur] = lbl[~iblur]
+    # apply downsample
+    for k in ii:
+        i0 = imgi[k:k + 1, :, ::ds[k], ::ds[k]] if iso else imgi[k:k + 1, :, ::ds[k]]
+        imgi[k] = interpolate(i0, size=lbl[k].shape[-2:], mode="bilinear")
+    # add poisson noise
+    ipoisson = np.random.rand(len(lbl)) < poisson
+    if ipoisson.sum() > 0:
+        if pscale is None:
+            pscale = torch.zeros(len(lbl))
+            m = torch.distributions.gamma.Gamma(alpha, beta)
+            pscale = torch.clamp(m.rsample(sample_shape=(ipoisson.sum(),)), 1.)
+            #pscale = torch.clamp(20 * (torch.rand(size=(len(lbl),), device=lbl.device)), 1.5)
+            pscale = pscale.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1).to(device)
+        else:
+            pscale = pscale * torch.ones((ipoisson.sum(), 1, 1, 1), device=device)
+        imgi[ipoisson] = torch.poisson(pscale * imgi[ipoisson])
+    imgi[~ipoisson] = imgi[~ipoisson]
+    # renormalize
+    imgi = img_norm(imgi)
+    return imgi
+def random_rotate_and_resize_noise(data, labels=None, diams=None, poisson=0.7, blur=0.7,
+                                   downsample=0.0, beta=0.7, gblur=1.0, diam_mean=30,
+                                   ds_max=7, uniform_blur=False, iso=True, rotate=True,
+                                   device=torch.device("cuda"), xy=(224, 224),
+                                   nchan_noise=1, keep_raw=True):
+    """
+    Applies random rotation, resizing, and noise to the input data.
+    Args:
+        data (numpy.ndarray): The input data.
+        labels (numpy.ndarray, optional): The flow and cellprob labels associated with the data. Defaults to None.
+        diams (float, optional): The diameter of the objects. Defaults to None.
+        poisson (float, optional): The Poisson noise probability. Defaults to 0.7.
+        blur (float, optional): The blur probability. Defaults to 0.7.
+        downsample (float, optional): The downsample probability. Defaults to 0.0.
+        beta (float, optional): The beta value for the poisson noise distribution. Defaults to 0.7.
+        gblur (float, optional): The Gaussian blur level. Defaults to 1.0.
+        diam_mean (float, optional): The mean diameter. Defaults to 30.
+        ds_max (int, optional): The maximum downsample value. Defaults to 7.
+        iso (bool, optional): Whether to apply isotropic augmentation. Defaults to True.
+        rotate (bool, optional): Whether to apply rotation augmentation. Defaults to True.
+        device (torch.device, optional): The device to use. Defaults to torch.device("cuda").
+        xy (tuple, optional): The size of the output image. Defaults to (224, 224).
+        nchan_noise (int, optional): The number of channels to add noise to. Defaults to 1.
+        keep_raw (bool, optional): Whether to keep the raw image. Defaults to True.
+    Returns:
+        torch.Tensor: The augmented image and augmented noisy/blurry/downsampled version of image.
+        torch.Tensor: The augmented labels.
+        float: The scale factor applied to the image.
+    """
+    if device == None:
+        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('mps') if torch.backends.mps.is_available() else None
+    diams = 30 if diams is None else diams
+    random_diam = diam_mean * (2**(2 * np.random.rand(len(data)) - 1))
+    random_rsc = diams / random_diam  #/ random_diam
+    #rsc /= random_scale
+    xy0 = (340, 340)
+    nchan = data[0].shape[0]
+    data_new = np.zeros((len(data), (1 + keep_raw) * nchan, xy0[0], xy0[1]), "float32")
+    labels_new = np.zeros((len(data), 3, xy0[0], xy0[1]), "float32")
+    for i in range(
+            len(data)):  #, (sc, img, lbl) in enumerate(zip(random_rsc, data, labels)):
+        sc = random_rsc[i]
+        img = data[i]
+        lbl = labels[i] if labels is not None else None
+        # create affine transform to resize
+        Ly, Lx = img.shape[-2:]
+        dxy = np.maximum(0, np.array([Lx / sc - xy0[1], Ly / sc - xy0[0]]))
+        dxy = (np.random.rand(2,) - .5) * dxy
+        cc = np.array([Lx / 2, Ly / 2])
+        cc1 = cc - np.array([Lx - xy0[1], Ly - xy0[0]]) / 2 + dxy
+        pts1 = np.float32([cc, cc + np.array([1, 0]), cc + np.array([0, 1])])
+        pts2 = np.float32(
+            [cc1, cc1 + np.array([1, 0]) / sc, cc1 + np.array([0, 1]) / sc])
+        M = cv2.getAffineTransform(pts1, pts2)
+        # apply to image
+        for c in range(nchan):
+            img_rsz = cv2.warpAffine(img[c], M, xy0, flags=cv2.INTER_LINEAR)
+            #img_noise = add_noise(torch.from_numpy(img_rsz).to(device).unsqueeze(0)).cpu().numpy().squeeze(0)
+            data_new[i, c] = img_rsz
+            if keep_raw:
+                data_new[i, c + nchan] = img_rsz
+        if lbl is not None:
+            # apply to labels
+            labels_new[i, 0] = cv2.warpAffine(lbl[0], M, xy0, flags=cv2.INTER_NEAREST)
+            labels_new[i, 1] = cv2.warpAffine(lbl[1], M, xy0, flags=cv2.INTER_LINEAR)
+            labels_new[i, 2] = cv2.warpAffine(lbl[2], M, xy0, flags=cv2.INTER_LINEAR)
+    rsc = random_diam / diam_mean
+    # add noise before augmentations
+    img = torch.from_numpy(data_new).to(device)
+    img = torch.clamp(img, 0.)
+    # just add noise to cyto if nchan_noise=1
+    img[:, :nchan_noise] = add_noise(
+        img[:, :nchan_noise], poisson=poisson, blur=blur, ds_max=ds_max, iso=iso,
+        downsample=downsample, beta=beta, gblur=gblur,
+        diams=torch.from_numpy(random_diam).to(device).float())
+    # img -= img.mean(dim=(-2,-1), keepdim=True)
+    # img /= img.std(dim=(-2,-1), keepdim=True) + 1e-3
+    img = img.cpu().numpy()
+    # augmentations
+    img, lbl, scale = transforms.random_rotate_and_resize(
+        img,
+        Y=labels_new,
+        xy=xy,
+        rotate=False if not iso else rotate,
+        #(iso and downsample==0),
+        rescale=rsc,
+        scale_range=0.5)
+    img = torch.from_numpy(img).to(device)
+    lbl = torch.from_numpy(lbl).to(device)
+    return img, lbl, scale
+def one_chan_cellpose(device, model_type="cyto2", pretrained_model=None):
+    """
+    Creates a Cellpose network with a single input channel.
+    Args:
+        device (str): The device to run the network on.
+        model_type (str, optional): The type of Cellpose model to use. Defaults to "cyto2".
+        pretrained_model (str, optional): The path to a pretrained model file. Defaults to None.
+    Returns:
+        torch.nn.Module: The Cellpose network with a single input channel.
+    """
+    if pretrained_model is not None and not os.path.exists(pretrained_model):
+        model_type = pretrained_model
+        pretrained_model = None
+    nbase = [32, 64, 128, 256]
+    nchan = 1
+    net1 = resnet_torch.CPnet([nchan, *nbase], nout=3, sz=3).to(device)
+    filename = model_path(model_type,
+                          0) if pretrained_model is None else pretrained_model
+    weights = torch.load(filename, weights_only=True)
+    zp = 0
+    print(filename)
+    for name in net1.state_dict():
+        if ("res_down_0.conv.conv_0" not in name and
+                #"output" not in name and
+                "res_down_0.proj" not in name and name != "diam_mean" and
+                name != "diam_labels"):
+            net1.state_dict()[name].copy_(weights[name])
+        elif "res_down_0" in name:
+            if len(weights[name].shape) > 0:
+                new_weight = torch.zeros_like(net1.state_dict()[name])
+                if weights[name].shape[0] == 2:
+                    new_weight[:] = weights[name][0]
+                elif len(weights[name].shape) > 1 and weights[name].shape[1] == 2:
+                    new_weight[:, zp] = weights[name][:, 0]
+                else:
+                    new_weight = weights[name]
+            else:
+                new_weight = weights[name]
+            net1.state_dict()[name].copy_(new_weight)
+    return net1
+class CellposeDenoiseModel():
+    """ model to run Cellpose and Image restoration """
+    def __init__(self, gpu=False, pretrained_model=False, model_type=None,
+                 restore_type="denoise_cyto3", nchan=2,
+                 chan2_restore=False, device=None):
+        self.dn = DenoiseModel(gpu=gpu, model_type=restore_type, chan2=chan2_restore,
+                               device=device)
+        self.cp = CellposeModel(gpu=gpu, model_type=model_type, nchan=nchan,
+                                pretrained_model=pretrained_model, device=device)
+    def eval(self, x, batch_size=8, channels=None, channel_axis=None, z_axis=None,
+             normalize=True, rescale=None, diameter=None, tile_overlap=0.1,
+             augment=False, resample=True, invert=False, flow_threshold=0.4,
+             cellprob_threshold=0.0, do_3D=False, anisotropy=None, stitch_threshold=0.0,
+             min_size=15, niter=None, interp=True, bsize=224, flow3D_smooth=0):
+        """
+        Restore array or list of images using the image restoration model, and then segment.
+        Args:
+            x (list, np.ndarry): can be list of 2D/3D/4D images, or array of 2D/3D/4D images
+            batch_size (int, optional): number of 224x224 patches to run simultaneously on the GPU
+                (can make smaller or bigger depending on GPU memory usage). Defaults to 8.
+            channels (list, optional): list of channels, either of length 2 or of length number of images by 2.
+                First element of list is the channel to segment (0=grayscale, 1=red, 2=green, 3=blue).
+                Second element of list is the optional nuclear channel (0=none, 1=red, 2=green, 3=blue).
+                For instance, to segment grayscale images, input [0,0]. To segment images with cells
+                in green and nuclei in blue, input [2,3]. To segment one grayscale image and one
+                image with cells in green and nuclei in blue, input [[0,0], [2,3]].
+                Defaults to None.
+            channel_axis (int, optional): channel axis in element of list x, or of np.ndarray x.
+                if None, channels dimension is attempted to be automatically determined. Defaults to None.
+            z_axis  (int, optional): z axis in element of list x, or of np.ndarray x.
+                if None, z dimension is attempted to be automatically determined. Defaults to None.
+            normalize (bool, optional): if True, normalize data so 0.0=1st percentile and 1.0=99th percentile of image intensities in each channel;
+                can also pass dictionary of parameters (all keys are optional, default values shown):
+                    - "lowhigh"=None : pass in normalization values for 0.0 and 1.0 as list [low, high] (if not None, all following parameters ignored)
+                    - "sharpen"=0 ; sharpen image with high pass filter, recommended to be 1/4-1/8 diameter of cells in pixels
+                    - "normalize"=True ; run normalization (if False, all following parameters ignored)
+                    - "percentile"=None : pass in percentiles to use as list [perc_low, perc_high]
+                    - "tile_norm"=0 ; compute normalization in tiles across image to brighten dark areas, to turn on set to window size in pixels (e.g. 100)
+                    - "norm3D"=False ; compute normalization across entire z-stack rather than plane-by-plane in stitching mode.
+                Defaults to True.
+            rescale (float, optional): resize factor for each image, if None, set to 1.0;
+                (only used if diameter is None). Defaults to None.
+            diameter (float, optional):  diameter for each image,
+                if diameter is None, set to diam_mean or diam_train if available. Defaults to None.
+            tile_overlap (float, optional): fraction of overlap of tiles when computing flows. Defaults to 0.1.
+            augment (bool, optional): augment tiles by flipping and averaging for segmentation. Defaults to False.
+            resample (bool, optional): run dynamics at original image size (will be slower but create more accurate boundaries). Defaults to True.
+            invert (bool, optional): invert image pixel intensity before running network. Defaults to False.
+            flow_threshold (float, optional): flow error threshold (all cells with errors below threshold are kept) (not used for 3D). Defaults to 0.4.
+            cellprob_threshold (float, optional): all pixels with value above threshold kept for masks, decrease to find more and larger masks. Defaults to 0.0.
+            do_3D (bool, optional): set to True to run 3D segmentation on 3D/4D image input. Defaults to False.
+            anisotropy (float, optional): for 3D segmentation, optional rescaling factor (e.g. set to 2.0 if Z is sampled half as dense as X or Y). Defaults to None.
+            stitch_threshold (float, optional): if stitch_threshold>0.0 and not do_3D, masks are stitched in 3D to return volume segmentation. Defaults to 0.0.
+            min_size (int, optional): all ROIs below this size, in pixels, will be discarded. Defaults to 15.
+            flow3D_smooth (int, optional): if do_3D and flow3D_smooth>0, smooth flows with gaussian filter of this stddev. Defaults to 0.
+            niter (int, optional): number of iterations for dynamics computation. if None, it is set proportional to the diameter. Defaults to None.
+            interp (bool, optional): interpolate during 2D dynamics (not available in 3D) . Defaults to True.
+        Returns:
+            A tuple containing (masks, flows, styles, imgs); masks: labelled image(s), where 0=no masks; 1,2,...=mask labels;
+            flows: list of lists: flows[k][0] = XY flow in HSV 0-255; flows[k][1] = XY(Z) flows at each pixel; flows[k][2] = cell probability (if > cellprob_threshold, pixel used for dynamics); flows[k][3] = final pixel locations after Euler integration;
+            styles: style vector summarizing each image of size 256;
+            imgs: Restored images.
+        """
+        if isinstance(normalize, dict):
+            normalize_params = {**normalize_default, **normalize}
+        elif not isinstance(normalize, bool):
+            raise ValueError("normalize parameter must be a bool or a dict")
+        else:
+            normalize_params = normalize_default
+            normalize_params["normalize"] = normalize
+        normalize_params["invert"] = invert
+        img_restore = self.dn.eval(x, batch_size=batch_size, channels=channels,
+                                   channel_axis=channel_axis, z_axis=z_axis,
+                                   do_3D=do_3D,
+                                   normalize=normalize_params, rescale=rescale,
+                                   diameter=diameter,
+                                   tile_overlap=tile_overlap, bsize=bsize)
+        # turn off special normalization for segmentation
+        normalize_params = normalize_default
+        # change channels for segmentation
+        if channels is not None:
+            channels_new = [0, 0] if channels[0] == 0 else [1, 2]
+        else:
+            channels_new = None
+        # change diameter if self.ratio > 1 (upsampled to self.dn.diam_mean)
+        diameter = self.dn.diam_mean if self.dn.ratio > 1 else diameter
+        masks, flows, styles = self.cp.eval(
+            img_restore, batch_size=batch_size, channels=channels_new, channel_axis=-1,
+            z_axis=0 if not isinstance(img_restore, list) and img_restore.ndim > 3 and img_restore.shape[0] > 0 else None,
+            normalize=normalize_params, rescale=rescale, diameter=diameter,
+            tile_overlap=tile_overlap, augment=augment, resample=resample,
+            invert=invert, flow_threshold=flow_threshold,
+            cellprob_threshold=cellprob_threshold, do_3D=do_3D, anisotropy=anisotropy,
+            stitch_threshold=stitch_threshold, min_size=min_size, niter=niter,
+            interp=interp, bsize=bsize)
+        return masks, flows, styles, img_restore
+class DenoiseModel():
+    """
+    DenoiseModel class for denoising images using Cellpose denoising model.
+    Args:
+        gpu (bool, optional): Whether to use GPU for computation. Defaults to False.
+        pretrained_model (bool or str or Path, optional): Pretrained model to use for denoising.
+            Can be a string or path. Defaults to False.
+        nchan (int, optional): Number of channels in the input images, all Cellpose 3 models were trained with nchan=1. Defaults to 1.
+        model_type (str, optional): Type of pretrained model to use ("denoise_cyto3", "deblur_cyto3", "upsample_cyto3", ...). Defaults to None.
+        chan2 (bool, optional): Whether to use a separate model for the second channel. Defaults to False.
+        diam_mean (float, optional): Mean diameter of the objects in the images. Defaults to 30.0.
+        device (torch.device, optional): Device to use for computation. Defaults to None.
+    Attributes:
+        nchan (int): Number of channels in the input images.
+        diam_mean (float): Mean diameter of the objects in the images.
+        net (CPnet): Cellpose network for denoising.
+        pretrained_model (bool or str or Path): Pretrained model path to use for denoising.
+        net_chan2 (CPnet or None): Cellpose network for the second channel, if applicable.
+        net_type (str): Type of the denoising network.
+    Methods:
+        eval(x, batch_size=8, channels=None, channel_axis=None, z_axis=None,
+                normalize=True, rescale=None, diameter=None, tile=True, tile_overlap=0.1)
+            Denoise array or list of images using the denoising model.
+        _eval(net, x, normalize=True, rescale=None, diameter=None, tile=True,
+                tile_overlap=0.1)
+            Run denoising model on a single channel.
+    """
+    def __init__(self, gpu=False, pretrained_model=False, nchan=1, model_type=None,
+                 chan2=False, diam_mean=30., device=None):
+        self.nchan = nchan
+        if pretrained_model and (not isinstance(pretrained_model, str) and
+                                 not isinstance(pretrained_model, Path)):
+            raise ValueError("pretrained_model must be a string or path")
+        self.diam_mean = diam_mean
+        builtin = True
+        if model_type is not None or (pretrained_model and
+                                      not os.path.exists(pretrained_model)):
+            pretrained_model_string = model_type if model_type is not None else "denoise_cyto3"
+            if ~np.any([pretrained_model_string == s for s in MODEL_NAMES]):
+                pretrained_model_string = "denoise_cyto3"
+            pretrained_model = model_path(pretrained_model_string)
+            if (pretrained_model and not os.path.exists(pretrained_model)):
+                denoise_logger.warning("pretrained model has incorrect path")
+            denoise_logger.info(f">> {pretrained_model_string} << model set to be used")
+            self.diam_mean = 17. if "nuclei" in pretrained_model_string else 30.
+        else:
+            if pretrained_model:
+                builtin = False
+                pretrained_model_string = pretrained_model
+                denoise_logger.info(f">>>> loading model {pretrained_model_string}")
+        # assign network device
+        if device is None:
+            sdevice, gpu = assign_device(use_torch=True, gpu=gpu)
+        self.device = device if device is not None else sdevice
+        if device is not None:
+            device_gpu = self.device.type == "cuda"
+        self.gpu = gpu if device is None else device_gpu
+        # create network
+        self.nchan = nchan
+        self.nclasses = 1
+        nbase = [32, 64, 128, 256]
+        self.nchan = nchan
+        self.nbase = [nchan, *nbase]
+        self.net = CPnet(self.nbase, self.nclasses, sz=3,
+                         max_pool=True, diam_mean=diam_mean).to(self.device)
+        self.pretrained_model = pretrained_model
+        self.net_chan2 = None
+        if self.pretrained_model:
+            self.net.load_model(self.pretrained_model, device=self.device)
+            denoise_logger.info(
+                f">>>> model diam_mean = {self.diam_mean: .3f} (ROIs rescaled to this size during training)"
+            )
+            if chan2 and builtin:
+                chan2_path = model_path(
+                    os.path.split(self.pretrained_model)[-1].split("_")[0] + "_nuclei")
+                print(f"loading model for chan2: {os.path.split(str(chan2_path))[-1]}")
+                self.net_chan2 = CPnet(self.nbase, self.nclasses, sz=3,
+                                        max_pool=True,
+                                       diam_mean=17.).to(self.device)
+                self.net_chan2.load_model(chan2_path, device=self.device)
+        self.net_type = "cellpose_denoise"
+    def eval(self, x, batch_size=8, channels=None, channel_axis=None, z_axis=None,
+             normalize=True, rescale=None, diameter=None, tile=True, do_3D=False,
+             tile_overlap=0.1, bsize=224):
+        """
+        Restore array or list of images using the image restoration model.
+        Args:
+            x (list, np.ndarry): can be list of 2D/3D/4D images, or array of 2D/3D/4D images
+            batch_size (int, optional): number of 224x224 patches to run simultaneously on the GPU
+                (can make smaller or bigger depending on GPU memory usage). Defaults to 8.
+            channels (list, optional): list of channels, either of length 2 or of length number of images by 2.
+                First element of list is the channel to segment (0=grayscale, 1=red, 2=green, 3=blue).
+                Second element of list is the optional nuclear channel (0=none, 1=red, 2=green, 3=blue).
+                For instance, to segment grayscale images, input [0,0]. To segment images with cells
+                in green and nuclei in blue, input [2,3]. To segment one grayscale image and one
+                image with cells in green and nuclei in blue, input [[0,0], [2,3]].
+                Defaults to None.
+            channel_axis (int, optional): channel axis in element of list x, or of np.ndarray x.
+                if None, channels dimension is attempted to be automatically determined. Defaults to None.
+            z_axis  (int, optional): z axis in element of list x, or of np.ndarray x.
+                if None, z dimension is attempted to be automatically determined. Defaults to None.
+            normalize (bool, optional): if True, normalize data so 0.0=1st percentile and 1.0=99th percentile of image intensities in each channel;
+                can also pass dictionary of parameters (all keys are optional, default values shown):
+                    - "lowhigh"=None : pass in normalization values for 0.0 and 1.0 as list [low, high] (if not None, all following parameters ignored)
+                    - "sharpen"=0 ; sharpen image with high pass filter, recommended to be 1/4-1/8 diameter of cells in pixels
+                    - "normalize"=True ; run normalization (if False, all following parameters ignored)
+                    - "percentile"=None : pass in percentiles to use as list [perc_low, perc_high]
+                    - "tile_norm"=0 ; compute normalization in tiles across image to brighten dark areas, to turn on set to window size in pixels (e.g. 100)
+                    - "norm3D"=False ; compute normalization across entire z-stack rather than plane-by-plane in stitching mode.
+                Defaults to True.
+            rescale (float, optional): resize factor for each image, if None, set to 1.0;
+                (only used if diameter is None). Defaults to None.
+            diameter (float, optional):  diameter for each image,
+                if diameter is None, set to diam_mean or diam_train if available. Defaults to None.
+            tile_overlap (float, optional): fraction of overlap of tiles when computing flows. Defaults to 0.1.
+        Returns:
+            list: A list of 2D/3D arrays of restored images
+        """
+        if isinstance(x, list) or x.squeeze().ndim == 5:
+            tqdm_out = utils.TqdmToLogger(denoise_logger, level=logging.INFO)
+            nimg = len(x)
+            iterator = trange(nimg, file=tqdm_out,
+                              mininterval=30) if nimg > 1 else range(nimg)
+            imgs = []
+            for i in iterator:
+                imgi = self.eval(
+                    x[i], batch_size=batch_size,
+                    channels=channels[i] if channels is not None and
+                    ((len(channels) == len(x) and
+                      (isinstance(channels[i], list) or
+                       isinstance(channels[i], np.ndarray)) and len(channels[i]) == 2))
+                    else channels, channel_axis=channel_axis, z_axis=z_axis,
+                    normalize=normalize,
+                    do_3D=do_3D,
+                    rescale=rescale[i] if isinstance(rescale, list) or
+                    isinstance(rescale, np.ndarray) else rescale,
+                    diameter=diameter[i] if isinstance(diameter, list) or
+                    isinstance(diameter, np.ndarray) else diameter,
+                    tile_overlap=tile_overlap, bsize=bsize)
+                imgs.append(imgi)
+            if isinstance(x, np.ndarray):
+                imgs = np.array(imgs)
+            return imgs
+        else:
+            # reshape image
+            x = transforms.convert_image(x, channels, channel_axis=channel_axis,
+                                         z_axis=z_axis, do_3D=do_3D, nchan=None)
+            if x.ndim < 4:
+                squeeze = True
+                x = x[np.newaxis, ...]
+            else:
+                squeeze = False
+            # may need to interpolate image before running upsampling
+            self.ratio = 1.
+            if "upsample" in self.pretrained_model:
+                Ly, Lx = x.shape[-3:-1]
+                if diameter is not None and 3 <= diameter < self.diam_mean:
+                    self.ratio = self.diam_mean / diameter
+                    denoise_logger.info(
+                        f"upsampling image to {self.diam_mean} pixel diameter ({self.ratio:0.2f} times)"
+                    )
+                    Lyr, Lxr = int(Ly * self.ratio), int(Lx * self.ratio)
+                    x = transforms.resize_image(x, Ly=Lyr, Lx=Lxr)
+                else:
+                    denoise_logger.warning(
+                        f"not interpolating image before upsampling because diameter is set >= {self.diam_mean}"
+                    )
+                    #raise ValueError(f"diameter is set to {diameter}, needs to be >=3 and < {self.dn.diam_mean}")
+            self.batch_size = batch_size
+            if diameter is not None and diameter > 0:
+                rescale = self.diam_mean / diameter
+            elif rescale is None:
+                rescale = 1.0
+            if np.ptp(x[..., -1]) < 1e-3 or (channels is not None and channels[-1] == 0):
+                x = x[..., :1]
+            for c in range(x.shape[-1]):
+                rescale0 = rescale * 30. / 17. if c == 1 else rescale
+                if c == 0 or self.net_chan2 is None:
+                    x[...,
+                      c] = self._eval(self.net, x[..., c:c + 1], batch_size=batch_size,
+                                      normalize=normalize, rescale=rescale0,
+                                      tile_overlap=tile_overlap, bsize=bsize)[...,0]
+                else:
+                    x[...,
+                      c] = self._eval(self.net_chan2, x[...,
+                                                        c:c + 1], batch_size=batch_size,
+                                      normalize=normalize, rescale=rescale0,
+                                      tile_overlap=tile_overlap, bsize=bsize)[...,0]
+            x = x[0] if squeeze else x
+        return x
+    def _eval(self, net, x, batch_size=8, normalize=True, rescale=None,
+              tile_overlap=0.1, bsize=224):
+        """
+        Run image restoration model on a single channel.
+        Args:
+            x (list, np.ndarry): can be list of 2D/3D/4D images, or array of 2D/3D/4D images
+            batch_size (int, optional): number of 224x224 patches to run simultaneously on the GPU
+                (can make smaller or bigger depending on GPU memory usage). Defaults to 8.
+            normalize (bool, optional): if True, normalize data so 0.0=1st percentile and 1.0=99th percentile of image intensities in each channel;
+                can also pass dictionary of parameters (all keys are optional, default values shown):
+                    - "lowhigh"=None : pass in normalization values for 0.0 and 1.0 as list [low, high] (if not None, all following parameters ignored)
+                    - "sharpen"=0 ; sharpen image with high pass filter, recommended to be 1/4-1/8 diameter of cells in pixels
+                    - "normalize"=True ; run normalization (if False, all following parameters ignored)
+                    - "percentile"=None : pass in percentiles to use as list [perc_low, perc_high]
+                    - "tile_norm"=0 ; compute normalization in tiles across image to brighten dark areas, to turn on set to window size in pixels (e.g. 100)
+                    - "norm3D"=False ; compute normalization across entire z-stack rather than plane-by-plane in stitching mode.
+                Defaults to True.
+            rescale (float, optional): resize factor for each image, if None, set to 1.0;
+                (only used if diameter is None). Defaults to None.
+            tile_overlap (float, optional): fraction of overlap of tiles when computing flows. Defaults to 0.1.
+        Returns:
+            list: A list of 2D/3D arrays of restored images
+        """
+        if isinstance(normalize, dict):
+            normalize_params = {**normalize_default, **normalize}
+        elif not isinstance(normalize, bool):
+            raise ValueError("normalize parameter must be a bool or a dict")
+        else:
+            normalize_params = normalize_default
+            normalize_params["normalize"] = normalize
+        tic = time.time()
+        shape = x.shape
+        nimg = shape[0]
+        do_normalization = True if normalize_params["normalize"] else False
+        img = np.asarray(x)
+        if do_normalization:
+            img = transforms.normalize_img(img, **normalize_params)
+        if rescale != 1.0:
+            img = transforms.resize_image(img, rsz=rescale)
+        yf, style = run_net(self.net, img, bsize=bsize,
+                            tile_overlap=tile_overlap)
+        yf = transforms.resize_image(yf, shape[1], shape[2])
+        imgs = yf
+        del yf, style
+        # imgs = np.zeros((*x.shape[:-1], 1), np.float32)
+        # for i in iterator:
+        #     img = np.asarray(x[i])
+        #     if do_normalization:
+        #         img = transforms.normalize_img(img, **normalize_params)
+        #     if rescale != 1.0:
+        #         img = transforms.resize_image(img, rsz=[rescale, rescale])
+        #         if img.ndim == 2:
+        #             img = img[:, :, np.newaxis]
+        #     yf, style = run_net(net, img, batch_size=batch_size, augment=False,
+        #                         tile=tile, tile_overlap=tile_overlap, bsize=bsize)
+        #     img = transforms.resize_image(yf, Ly=x.shape[-3], Lx=x.shape[-2])
+        #     if img.ndim == 2:
+        #         img = img[:, :, np.newaxis]
+        #     imgs[i] = img
+        #     del yf, style
+        net_time = time.time() - tic
+        if nimg > 1:
+            denoise_logger.info("imgs denoised in %2.2fs" % (net_time))
+        return imgs
+def train(net, train_data=None, train_labels=None, train_files=None, test_data=None,
+          test_labels=None, test_files=None, train_probs=None, test_probs=None,
+          lam=[1., 1.5, 0.], scale_range=0.5, seg_model_type="cyto2", save_path=None,
+          save_every=100, save_each=False, poisson=0.7, beta=0.7, blur=0.7, gblur=1.0,
+          iso=True, uniform_blur=False, downsample=0., ds_max=7,
+          learning_rate=0.005, n_epochs=500,
+          weight_decay=0.00001, batch_size=8, nimg_per_epoch=None,
+          nimg_test_per_epoch=None, model_name=None):
+    # net properties
+    device = net.device
+    nchan = net.nchan
+    diam_mean = net.diam_mean.item()
+    args = np.array([poisson, beta, blur, gblur, downsample])
+    if args.ndim == 1:
+        args = args[:, np.newaxis]
+    poisson, beta, blur, gblur, downsample = args
+    nnoise = len(poisson)
+    d = datetime.datetime.now()
+    if save_path is not None:
+        if model_name is None:
+            filename = ""
+            lstrs = ["per", "seg", "rec"]
+            for k, (l, s) in enumerate(zip(lam, lstrs)):
+                filename += f"{s}_{l:.2f}_"
+            if not iso:
+                filename += "aniso_"
+            if poisson.sum() > 0:
+                filename += "poisson_"
+            if blur.sum() > 0:
+                filename += "blur_"
+            if downsample.sum() > 0:
+                filename += "downsample_"
+            filename += d.strftime("%Y_%m_%d_%H_%M_%S.%f")
+            filename = os.path.join(save_path, filename)
+        else:
+            filename = os.path.join(save_path, model_name)
+        print(filename)
+    for i in range(len(poisson)):
+        denoise_logger.info(
+            f"poisson: {poisson[i]: 0.2f}, beta: {beta[i]: 0.2f}, blur: {blur[i]: 0.2f}, gblur: {gblur[i]: 0.2f}, downsample: {downsample[i]: 0.2f}"
+        )
+    net1 = one_chan_cellpose(device=device, pretrained_model=seg_model_type)
+    learning_rate_const = learning_rate
+    LR = np.linspace(0, learning_rate_const, 10)
+    LR = np.append(LR, learning_rate_const * np.ones(n_epochs - 100))
+    for i in range(10):
+        LR = np.append(LR, LR[-1] / 2 * np.ones(10))
+    learning_rate = LR
+    batch_size = 8
+    optimizer = torch.optim.AdamW(net.parameters(), lr=learning_rate[0],
+                                  weight_decay=weight_decay)
+    if train_data is not None:
+        nimg = len(train_data)
+        diam_train = np.array(
+            [utils.diameters(train_labels[k])[0] for k in trange(len(train_labels))])
+        diam_train[diam_train < 5] = 5.
+        if test_data is not None:
+            diam_test = np.array(
+                [utils.diameters(test_labels[k])[0] for k in trange(len(test_labels))])
+            diam_test[diam_test < 5] = 5.
+            nimg_test = len(test_data)
+    else:
+        nimg = len(train_files)
+        denoise_logger.info(">>> using files instead of loading dataset")
+        train_labels_files = [str(tf)[:-4] + f"_flows.tif" for tf in train_files]
+        denoise_logger.info(">>> computing diameters")
+        diam_train = np.array([
+            utils.diameters(io.imread(train_labels_files[k])[0])[0]
+            for k in trange(len(train_labels_files))
+        ])
+        diam_train[diam_train < 5] = 5.
+        if test_files is not None:
+            nimg_test = len(test_files)
+            test_labels_files = [str(tf)[:-4] + f"_flows.tif" for tf in test_files]
+            diam_test = np.array([
+                utils.diameters(io.imread(test_labels_files[k])[0])[0]
+                for k in trange(len(test_labels_files))
+            ])
+            diam_test[diam_test < 5] = 5.
+    train_probs = 1. / nimg * np.ones(nimg,
+                                      "float64") if train_probs is None else train_probs
+    if test_files is not None or test_data is not None:
+        test_probs = 1. / nimg_test * np.ones(
+            nimg_test, "float64") if test_probs is None else test_probs
+    tic = time.time()
+    nimg_per_epoch = nimg if nimg_per_epoch is None else nimg_per_epoch
+    if test_files is not None or test_data is not None:
+        nimg_test_per_epoch = nimg_test if nimg_test_per_epoch is None else nimg_test_per_epoch
+    nbatch = 0
+    train_losses, test_losses = [], []
+    for iepoch in range(n_epochs):
+        np.random.seed(iepoch)
+        rperm = np.random.choice(np.arange(0, nimg), size=(nimg_per_epoch,),
+                                 p=train_probs)
+        torch.manual_seed(iepoch)
+        np.random.seed(iepoch)
+        for param_group in optimizer.param_groups:
+            param_group["lr"] = learning_rate[iepoch]
+        lavg, lavg_per, nsum = 0, 0, 0
+        for ibatch in range(0, nimg_per_epoch, batch_size * nnoise):
+            inds = rperm[ibatch : ibatch + batch_size * nnoise]
+            if train_data is None:
+                imgs = [np.maximum(0, io.imread(train_files[i])[:nchan]) for i in inds]
+                lbls = [io.imread(train_labels_files[i])[1:] for i in inds]
+            else:
+                imgs = [train_data[i][:nchan] for i in inds]
+                lbls = [train_labels[i][1:] for i in inds]
+            #inoise = nbatch % nnoise
+            rnoise = np.random.permutation(nnoise)
+            for i, inoise in enumerate(rnoise):
+                if i * batch_size < len(imgs):
+                    imgi, lbli, scale = random_rotate_and_resize_noise(
+                        imgs[i * batch_size : (i + 1) * batch_size],
+                        lbls[i * batch_size : (i + 1) * batch_size],
+                        diam_train[inds][i * batch_size : (i + 1) * batch_size].copy(),
+                        poisson=poisson[inoise],
+                        beta=beta[inoise], gblur=gblur[inoise], blur=blur[inoise], iso=iso,
+                        downsample=downsample[inoise], uniform_blur=uniform_blur,
+                        diam_mean=diam_mean, ds_max=ds_max,
+                        device=device)
+                    if i == 0:
+                        img = imgi
+                        lbl = lbli
+                    else:
+                        img = torch.cat((img, imgi), axis=0)
+                        lbl = torch.cat((lbl, lbli), axis=0)
+            if nnoise > 0:
+                iperm = np.random.permutation(img.shape[0])
+                img, lbl = img[iperm], lbl[iperm]
+            for i in range(nnoise):
+                optimizer.zero_grad()
+                imgi = img[i * batch_size: (i + 1) * batch_size]
+                lbli = lbl[i * batch_size: (i + 1) * batch_size]
+                if imgi.shape[0] > 0:
+                    loss, loss_per = train_loss(net, imgi[:, :nchan], net1=net1,
+                                            img=imgi[:, nchan:], lbl=lbli, lam=lam)
+                    loss.backward()
+                    optimizer.step()
+                    lavg += loss.item() * imgi.shape[0]
+                    lavg_per += loss_per.item() * imgi.shape[0]
+            nsum += len(img)
+            nbatch += 1
+        if iepoch % 5 == 0 or iepoch < 10:
+            lavg = lavg / nsum
+            lavg_per = lavg_per / nsum
+            if test_data is not None or test_files is not None:
+                lavgt, nsum = 0., 0
+                np.random.seed(42)
+                rperm = np.random.choice(np.arange(0, nimg_test),
+                                         size=(nimg_test_per_epoch,), p=test_probs)
+                inoise = iepoch % nnoise
+                torch.manual_seed(inoise)
+                for ibatch in range(0, nimg_test_per_epoch, batch_size):
+                    inds = rperm[ibatch:ibatch + batch_size]
+                    if test_data is None:
+                        imgs = [
+                            np.maximum(0,
+                                       io.imread(test_files[i])[:nchan]) for i in inds
+                        ]
+                        lbls = [io.imread(test_labels_files[i])[1:] for i in inds]
+                    else:
+                        imgs = [test_data[i][:nchan] for i in inds]
+                        lbls = [test_labels[i][1:] for i in inds]
+                    img, lbl, scale = random_rotate_and_resize_noise(
+                        imgs, lbls, diam_test[inds].copy(), poisson=poisson[inoise],
+                        beta=beta[inoise], blur=blur[inoise], gblur=gblur[inoise],
+                        iso=iso, downsample=downsample[inoise], uniform_blur=uniform_blur,
+                        diam_mean=diam_mean, ds_max=ds_max, device=device)
+                    loss, loss_per = test_loss(net, img[:, :nchan], net1=net1,
+                                               img=img[:, nchan:], lbl=lbl, lam=lam)
+                    lavgt += loss.item() * img.shape[0]
+                    nsum += len(img)
+                lavgt = lavgt / nsum
+                denoise_logger.info(
+                    "Epoch %d, Time %4.1fs, Loss %0.3f, loss_per %0.3f, Loss Test %0.3f, LR %2.4f"
+                    % (iepoch, time.time() - tic, lavg, lavg_per, lavgt,
+                       learning_rate[iepoch]))
+                test_losses.append(lavgt)
+            else:
+                denoise_logger.info(
+                    "Epoch %d, Time %4.1fs, Loss %0.3f, loss_per %0.3f, LR %2.4f" %
+                    (iepoch, time.time() - tic, lavg, lavg_per, learning_rate[iepoch]))
+            train_losses.append(lavg)
+        if save_path is not None:
+            if iepoch == n_epochs - 1 or (iepoch % save_every == 0 and iepoch != 0):
+                if save_each:  #separate files as model progresses
+                    filename0 = str(filename) + f"_epoch_{iepoch:%04d}"
+                else:
+                    filename0 = filename
+                denoise_logger.info(f"saving network parameters to {filename0}")
+                net.save_model(filename0)
+        else:
+            filename = save_path
+    return filename, train_losses, test_losses
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="cellpose parameters")
+    input_img_args = parser.add_argument_group("input image arguments")
+    input_img_args.add_argument("--dir", default=[], type=str,
+                                help="folder containing data to run or train on.")
+    input_img_args.add_argument("--img_filter", default=[], type=str,
+                                help="end string for images to run on")
+    model_args = parser.add_argument_group("model arguments")
+    model_args.add_argument("--pretrained_model", default=[], type=str,
+                            help="pretrained denoising model")
+    training_args = parser.add_argument_group("training arguments")
+    training_args.add_argument("--test_dir", default=[], type=str,
+                               help="folder containing test data (optional)")
+    training_args.add_argument("--file_list", default=[], type=str,
+                               help="npy file containing list of train and test files")
+    training_args.add_argument("--seg_model_type", default="cyto2", type=str,
+                               help="model to use for seg training loss")
+    training_args.add_argument(
+        "--noise_type", default=[], type=str,
+        help="noise type to use (if input, then other noise params are ignored)")
+    training_args.add_argument("--poisson", default=0.8, type=float,
+                               help="fraction of images to add poisson noise to")
+    training_args.add_argument("--beta", default=0.7, type=float,
+                               help="scale of poisson noise")
+    training_args.add_argument("--blur", default=0., type=float,
+                               help="fraction of images to blur")
+    training_args.add_argument("--gblur", default=1.0, type=float,
+                               help="scale of gaussian blurring stddev")
+    training_args.add_argument("--downsample", default=0., type=float,
+                               help="fraction of images to downsample")
+    training_args.add_argument("--ds_max", default=7, type=int,
+                               help="max downsampling factor")
+    training_args.add_argument("--lam_per", default=1.0, type=float,
+                               help="weighting of perceptual loss")
+    training_args.add_argument("--lam_seg", default=1.5, type=float,
+                               help="weighting of segmentation loss")
+    training_args.add_argument("--lam_rec", default=0., type=float,
+                               help="weighting of reconstruction loss")
+    training_args.add_argument(
+        "--diam_mean", default=30., type=float, help=
+        "mean diameter to resize cells to during training -- if starting from pretrained models it cannot be changed from 30.0"
+    )
+    training_args.add_argument("--learning_rate", default=0.001, type=float,
+                               help="learning rate. Default: %(default)s")
+    training_args.add_argument("--n_epochs", default=2000, type=int,
+                               help="number of epochs. Default: %(default)s")
+    training_args.add_argument(
+        "--save_each", default=False, action="store_true",
+        help="save each epoch as separate model")
+    training_args.add_argument(
+        "--nimg_per_epoch", default=0, type=int,
+        help="number of images per epoch. Default is length of training images")
+    training_args.add_argument(
+        "--nimg_test_per_epoch", default=0, type=int,
+        help="number of test images per epoch. Default is length of testing images")
+    io.logger_setup()
+    args = parser.parse_args()
+    lams = [args.lam_per, args.lam_seg, args.lam_rec]
+    print("lam", lams)
+    if len(args.noise_type) > 0:
+        noise_type = args.noise_type
+        uniform_blur = False
+        iso = True
+        if noise_type == "poisson":
+            poisson = 0.8
+            blur = 0.
+            downsample = 0.
+            beta = 0.7
+            gblur = 1.0
+        elif noise_type == "blur_expr":
+            poisson = 0.8
+            blur = 0.8
+            downsample = 0.
+            beta = 0.1
+            gblur = 0.5
+        elif noise_type == "blur":
+            poisson = 0.8
+            blur = 0.8
+            downsample = 0.
+            beta = 0.1
+            gblur = 10.0
+            uniform_blur = True
+        elif noise_type == "downsample_expr":
+            poisson = 0.8
+            blur = 0.8
+            downsample = 0.8
+            beta = 0.03
+            gblur = 1.0
+        elif noise_type == "downsample":
+            poisson = 0.8
+            blur = 0.8
+            downsample = 0.8
+            beta = 0.03
+            gblur = 5.0
+            uniform_blur = True
+        elif noise_type == "all":
+            poisson = [0.8, 0.8, 0.8]
+            blur = [0., 0.8, 0.8]
+            downsample = [0., 0., 0.8]
+            beta = [0.7, 0.1, 0.03]
+            gblur = [0., 10.0, 5.0]
+            uniform_blur = True
+        elif noise_type == "aniso":
+            poisson = 0.8
+            blur = 0.8
+            downsample = 0.8
+            beta = 0.1
+            gblur = args.ds_max * 1.5
+            iso = False
+        else:
+            raise ValueError(f"{noise_type} noise_type is not supported")
+    else:
+        poisson, beta = args.poisson, args.beta
+        blur, gblur = args.blur, args.gblur
+        downsample = args.downsample
+    pretrained_model = None if len(
+        args.pretrained_model) == 0 else args.pretrained_model
+    model = DenoiseModel(gpu=True, nchan=1, diam_mean=args.diam_mean,
+                         pretrained_model=pretrained_model)
+    train_data, labels, train_files, train_probs = None, None, None, None
+    test_data, test_labels, test_files, test_probs = None, None, None, None
+    if len(args.file_list) == 0:
+        output = io.load_train_test_data(args.dir, args.test_dir, "_img", "_masks", 0)
+        images, labels, image_names, test_images, test_labels, image_names_test = output
+        train_data = []
+        for i in range(len(images)):
+            img = images[i].astype("float32")
+            if img.ndim > 2:
+                img = img[0]
+            train_data.append(
+                np.maximum(transforms.normalize99(img), 0)[np.newaxis, :, :])
+        if len(args.test_dir) > 0:
+            test_data = []
+            for i in range(len(test_images)):
+                img = test_images[i].astype("float32")
+                if img.ndim > 2:
+                    img = img[0]
+                test_data.append(
+                    np.maximum(transforms.normalize99(img), 0)[np.newaxis, :, :])
+        save_path = os.path.join(args.dir, "../models/")
+    else:
+        root = args.dir
+        denoise_logger.info(
+            ">>> using file_list (assumes images are normalized and have flows!)")
+        dat = np.load(args.file_list, allow_pickle=True).item()
+        train_files = dat["train_files"]
+        test_files = dat["test_files"]
+        train_probs = dat["train_probs"] if "train_probs" in dat else None
+        test_probs = dat["test_probs"] if "test_probs" in dat else None
+        if str(train_files[0])[:len(str(root))] != str(root):
+            for i in range(len(train_files)):
+                new_path = root / Path(*train_files[i].parts[-3:])
+                if i == 0:
+                    print(f"changing path from {train_files[i]} to {new_path}")
+                train_files[i] = new_path
+            for i in range(len(test_files)):
+                new_path = root / Path(*test_files[i].parts[-3:])
+                test_files[i] = new_path
+        save_path = os.path.join(args.dir, "models/")
+    os.makedirs(save_path, exist_ok=True)
+    nimg_per_epoch = None if args.nimg_per_epoch == 0 else args.nimg_per_epoch
+    nimg_test_per_epoch = None if args.nimg_test_per_epoch == 0 else args.nimg_test_per_epoch
+    model_path = train(
+        model.net, train_data=train_data, train_labels=labels, train_files=train_files,
+        test_data=test_data, test_labels=test_labels, test_files=test_files,
+        train_probs=train_probs, test_probs=test_probs, poisson=poisson, beta=beta,
+        blur=blur, gblur=gblur, downsample=downsample, ds_max=args.ds_max,
+        iso=iso, uniform_blur=uniform_blur, n_epochs=args.n_epochs,
+        learning_rate=args.learning_rate,
+        lam=lams,
+        seg_model_type=args.seg_model_type, nimg_per_epoch=nimg_per_epoch,
+        nimg_test_per_epoch=nimg_test_per_epoch, save_path=save_path)
+def seg_train_noisy(model, train_data, train_labels, test_data=None, test_labels=None,
+                    poisson=0.8, blur=0.0, downsample=0.0, save_path=None,
+                    save_every=100, save_each=False, learning_rate=0.2, n_epochs=500,
+                    momentum=0.9, weight_decay=0.00001, SGD=True, batch_size=8,
+                    nimg_per_epoch=None, diameter=None, rescale=True, z_masking=False,
+                    model_name=None):
+    """ train function uses loss function model.loss_fn in models.py
+    (data should already be normalized)
+    """
+    d = datetime.datetime.now()
+    model.n_epochs = n_epochs
+    if isinstance(learning_rate, (list, np.ndarray)):
+        if isinstance(learning_rate, np.ndarray) and learning_rate.ndim > 1:
+            raise ValueError("learning_rate.ndim must equal 1")
+        elif len(learning_rate) != n_epochs:
+            raise ValueError(
+                "if learning_rate given as list or np.ndarray it must have length n_epochs"
+            )
+        model.learning_rate = learning_rate
+        model.learning_rate_const = mode(learning_rate)[0][0]
+    else:
+        model.learning_rate_const = learning_rate
+        # set learning rate schedule
+        if SGD:
+            LR = np.linspace(0, model.learning_rate_const, 10)
+            if model.n_epochs > 250:
+                LR = np.append(
+                    LR, model.learning_rate_const * np.ones(model.n_epochs - 100))
+                for i in range(10):
+                    LR = np.append(LR, LR[-1] / 2 * np.ones(10))
+            else:
+                LR = np.append(
+                    LR,
+                    model.learning_rate_const * np.ones(max(0, model.n_epochs - 10)))
+        else:
+            LR = model.learning_rate_const * np.ones(model.n_epochs)
+        model.learning_rate = LR
+    model.batch_size = batch_size
+    model._set_optimizer(model.learning_rate[0], momentum, weight_decay, SGD)
+    model._set_criterion()
+    nimg = len(train_data)
+    # compute average cell diameter
+    if diameter is None:
+        diam_train = np.array(
+            [utils.diameters(train_labels[k][0])[0] for k in range(len(train_labels))])
+        diam_train_mean = diam_train[diam_train > 0].mean()
+        model.diam_labels = diam_train_mean
+        if rescale:
+            diam_train[diam_train < 5] = 5.
+            if test_data is not None:
+                diam_test = np.array([
+                    utils.diameters(test_labels[k][0])[0]
+                    for k in range(len(test_labels))
+                ])
+                diam_test[diam_test < 5] = 5.
+            denoise_logger.info(">>>> median diameter set to = %d" % model.diam_mean)
+    elif rescale:
+        diam_train_mean = diameter
+        model.diam_labels = diameter
+        denoise_logger.info(">>>> median diameter set to = %d" % model.diam_mean)
+        diam_train = diameter * np.ones(len(train_labels), "float32")
+        if test_data is not None:
+            diam_test = diameter * np.ones(len(test_labels), "float32")
+    denoise_logger.info(
+        f">>>> mean of training label mask diameters (saved to model) {diam_train_mean:.3f}"
+    )
+    model.net.diam_labels.data = torch.ones(1, device=model.device) * diam_train_mean
+    nchan = train_data[0].shape[0]
+    denoise_logger.info(">>>> training network with %d channel input <<<<" % nchan)
+    denoise_logger.info(">>>> LR: %0.5f, batch_size: %d, weight_decay: %0.5f" %
+                        (model.learning_rate_const, model.batch_size, weight_decay))
+    if test_data is not None:
+        denoise_logger.info(f">>>> ntrain = {nimg}, ntest = {len(test_data)}")
+    else:
+        denoise_logger.info(f">>>> ntrain = {nimg}")
+    tic = time.time()
+    lavg, nsum = 0, 0
+    if save_path is not None:
+        _, file_label = os.path.split(save_path)
+        file_path = os.path.join(save_path, "models/")
+        if not os.path.exists(file_path):
+            os.makedirs(file_path)
+    else:
+        denoise_logger.warning("WARNING: no save_path given, model not saving")
+    ksave = 0
+    # get indices for each epoch for training
+    np.random.seed(0)
+    inds_all = np.zeros((0,), "int32")
+    if nimg_per_epoch is None or nimg > nimg_per_epoch:
+        nimg_per_epoch = nimg
+    denoise_logger.info(f">>>> nimg_per_epoch = {nimg_per_epoch}")
+    while len(inds_all) < n_epochs * nimg_per_epoch:
+        rperm = np.random.permutation(nimg)
+        inds_all = np.hstack((inds_all, rperm))
+    for iepoch in range(model.n_epochs):
+        if SGD:
+            model._set_learning_rate(model.learning_rate[iepoch])
+        np.random.seed(iepoch)
+        rperm = inds_all[iepoch * nimg_per_epoch:(iepoch + 1) * nimg_per_epoch]
+        for ibatch in range(0, nimg_per_epoch, batch_size):
+            inds = rperm[ibatch:ibatch + batch_size]
+            imgi, lbl, scale = random_rotate_and_resize_noise(
+                [train_data[i] for i in inds], [train_labels[i][1:] for i in inds],
+                poisson=poisson, blur=blur, downsample=downsample,
+                diams=diam_train[inds], diam_mean=model.diam_mean)
+            imgi = imgi[:, :1]  # keep noisy only
+            if z_masking:
+                nc = imgi.shape[1]
+                nb = imgi.shape[0]
+                ncmin = (np.random.rand(nb) > 0.25) * (np.random.randint(
+                    nc // 2 - 1, size=nb))
+                ncmax = nc - (np.random.rand(nb) > 0.25) * (np.random.randint(
+                    nc // 2 - 1, size=nb))
+                for b in range(nb):
+                    imgi[b, :ncmin[b]] = 0
+                    imgi[b, ncmax[b]:] = 0
+            train_loss = model._train_step(imgi, lbl)
+            lavg += train_loss
+            nsum += len(imgi)
+        if iepoch % 10 == 0 or iepoch == 5:
+            lavg = lavg / nsum
+            if test_data is not None:
+                lavgt, nsum = 0., 0
+                np.random.seed(42)
+                rperm = np.arange(0, len(test_data), 1, int)
+                for ibatch in range(0, len(test_data), batch_size):
+                    inds = rperm[ibatch:ibatch + batch_size]
+                    imgi, lbl, scale = random_rotate_and_resize_noise(
+                        [test_data[i] for i in inds],
+                        [test_labels[i][1:] for i in inds], poisson=poisson, blur=blur,
+                        downsample=downsample, diams=diam_test[inds],
+                        diam_mean=model.diam_mean)
+                    imgi = imgi[:, :1]  # keep noisy only
+                    test_loss = model._test_eval(imgi, lbl)
+                    lavgt += test_loss
+                    nsum += len(imgi)
+                denoise_logger.info(
+                    "Epoch %d, Time %4.1fs, Loss %2.4f, Loss Test %2.4f, LR %2.4f" %
+                    (iepoch, time.time() - tic, lavg, lavgt / nsum,
+                     model.learning_rate[iepoch]))
+            else:
+                denoise_logger.info(
+                    "Epoch %d, Time %4.1fs, Loss %2.4f, LR %2.4f" %
+                    (iepoch, time.time() - tic, lavg, model.learning_rate[iepoch]))
+            lavg, nsum = 0, 0
+        if save_path is not None:
+            if iepoch == model.n_epochs - 1 or iepoch % save_every == 1:
+                # save model at the end
+                if save_each:  #separate files as model progresses
+                    if model_name is None:
+                        filename = "{}_{}_{}_{}".format(
+                            model.net_type, file_label,
+                            d.strftime("%Y_%m_%d_%H_%M_%S.%f"), "epoch_" + str(iepoch))
+                    else:
+                        filename = "{}_{}".format(model_name, "epoch_" + str(iepoch))
+                else:
+                    if model_name is None:
+                        filename = "{}_{}_{}".format(model.net_type, file_label,
+                                                     d.strftime("%Y_%m_%d_%H_%M_%S.%f"))
+                    else:
+                        filename = model_name
+                filename = os.path.join(file_path, filename)
+                ksave += 1
+                denoise_logger.info(f"saving network parameters to {filename}")
+                model.net.save_model(filename)
+        else:
+            filename = save_path
+    return filename

models/seg_post_model/cellpose/dynamics.py ADDED Viewed

	@@ -0,0 +1,691 @@

+"""
+Copyright © 2025 Howard Hughes Medical Institute, Authored by Carsen Stringer , Michael Rariden and Marius Pachitariu.
+"""
+import os
+from scipy.ndimage import find_objects, center_of_mass, mean
+import torch
+import numpy as np
+import tifffile
+from tqdm import trange
+import fastremap
+import logging
+dynamics_logger = logging.getLogger(__name__)
+from . import utils
+import torch
+import torch.nn.functional as F
+def _extend_centers_gpu(neighbors, meds, isneighbor, shape, n_iter=200,
+                        device=torch.device("cpu")):
+    """Runs diffusion on GPU to generate flows for training images or quality control.
+    Args:
+        neighbors (torch.Tensor): 9 x pixels in masks.
+        meds (torch.Tensor): Mask centers.
+        isneighbor (torch.Tensor): Valid neighbor boolean 9 x pixels.
+        shape (tuple): Shape of the tensor.
+        n_iter (int, optional): Number of iterations. Defaults to 200.
+        device (torch.device, optional): Device to run the computation on. Defaults to torch.device("cpu").
+    Returns:
+        torch.Tensor: Generated flows.
+    """
+    if torch.prod(torch.tensor(shape)) > 4e7 or device.type == "mps":
+        T = torch.zeros(shape, dtype=torch.float, device=device)
+    else:
+        T = torch.zeros(shape, dtype=torch.double, device=device)
+    for i in range(n_iter):
+        T[tuple(meds.T)] += 1
+        Tneigh = T[tuple(neighbors)]
+        Tneigh *= isneighbor
+        T[tuple(neighbors[:, 0])] = Tneigh.mean(axis=0)
+    del meds, isneighbor, Tneigh
+    if T.ndim == 2:
+        grads = T[neighbors[0, [2, 1, 4, 3]], neighbors[1, [2, 1, 4, 3]]]
+        del neighbors
+        dy = grads[0] - grads[1]
+        dx = grads[2] - grads[3]
+        del grads
+        mu_torch = np.stack((dy.cpu().squeeze(0), dx.cpu().squeeze(0)), axis=-2)
+    else:
+        grads = T[tuple(neighbors[:, 1:])]
+        del neighbors
+        dz = grads[0] - grads[1]
+        dy = grads[2] - grads[3]
+        dx = grads[4] - grads[5]
+        del grads
+        mu_torch = np.stack(
+            (dz.cpu().squeeze(0), dy.cpu().squeeze(0), dx.cpu().squeeze(0)), axis=-2)
+    return mu_torch
+def center_of_mass(mask):
+    yi, xi = np.nonzero(mask)
+    ymean = int(np.round(yi.sum() / len(yi)))
+    xmean = int(np.round(xi.sum() / len(xi)))
+    if not ((yi==ymean) * (xi==xmean)).sum():
+        # center is closest point to (ymean, xmean) within mask
+        imin = ((xi - xmean)**2 + (yi - ymean)**2).argmin()
+        ymean = yi[imin]
+        xmean = xi[imin]
+    return ymean, xmean
+def get_centers(masks, slices):
+    centers = [center_of_mass(masks[slices[i]]==(i+1)) for i in range(len(slices))]
+    centers = np.array([np.array([centers[i][0] + slices[i][0].start, centers[i][1] + slices[i][1].start])
+                    for i in range(len(slices))])
+    exts = np.array([(slc[0].stop - slc[0].start) + (slc[1].stop - slc[1].start) + 2 for slc in slices])
+    return centers, exts
+def masks_to_flows_gpu(masks, device=torch.device("cpu"), niter=None):
+    """Convert masks to flows using diffusion from center pixel.
+    Center of masks where diffusion starts is defined by pixel closest to median within the mask.
+    Args:
+        masks (int, 2D or 3D array): Labelled masks. 0=NO masks; 1,2,...=mask labels.
+        device (torch.device, optional): The device to run the computation on. Defaults to torch.device("cpu").
+        niter (int, optional): Number of iterations for the diffusion process. Defaults to None.
+    Returns:
+        np.ndarray: A 4D array representing the flows for each pixel in Z, X, and Y.
+    Returns:
+        A tuple containing (mu, meds_p). mu is float 3D or 4D array of flows in (Z)XY.
+        meds_p are cell centers.
+    """
+    if device is None:
+        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('mps') if torch.backends.mps.is_available() else None
+    if masks.max() > 0:
+        Ly0, Lx0 = masks.shape
+        Ly, Lx = Ly0 + 2, Lx0 + 2
+        masks_padded = torch.from_numpy(masks.astype("int64")).to(device)
+        masks_padded = F.pad(masks_padded, (1, 1, 1, 1))
+        shape = masks_padded.shape
+        ### get mask pixel neighbors
+        y, x = torch.nonzero(masks_padded, as_tuple=True)
+        y = y.int()
+        x = x.int()
+        neighbors = torch.zeros((2, 9, y.shape[0]), dtype=torch.int, device=device)
+        yxi = [[0, -1, 1, 0, 0, -1, -1, 1, 1], [0, 0, 0, -1, 1, -1, 1, -1, 1]]
+        for i in range(9):
+            neighbors[0, i] = y + yxi[0][i]
+            neighbors[1, i] = x + yxi[1][i]
+        isneighbor = torch.ones((9, y.shape[0]), dtype=torch.bool, device=device)
+        m0 = masks_padded[neighbors[0, 0], neighbors[1, 0]]
+        for i in range(1, 9):
+            isneighbor[i] = masks_padded[neighbors[0, i], neighbors[1, i]] == m0
+        del m0, masks_padded
+        ### get center-of-mass within cell
+        slices = find_objects(masks)
+        centers, ext = get_centers(masks, slices)
+        meds_p = torch.from_numpy(centers).to(device).long()
+        meds_p += 1  # for padding
+        ### run diffusion
+        n_iter = 2 * ext.max() if niter is None else niter
+        mu = _extend_centers_gpu(neighbors, meds_p, isneighbor, shape, n_iter=n_iter,
+                                device=device)
+        mu = mu.astype("float64")
+        # new normalization
+        mu /= (1e-60 + (mu**2).sum(axis=0)**0.5)
+        # put into original image
+        mu0 = np.zeros((2, Ly0, Lx0))
+        mu0[:, y.cpu().numpy() - 1, x.cpu().numpy() - 1] = mu
+    else:
+        # no masks, return empty flows
+        mu0 = np.zeros((2, masks.shape[0], masks.shape[1]))
+    return mu0
+def masks_to_flows_gpu_3d(masks, device=None, niter=None):
+    """Convert masks to flows using diffusion from center pixel.
+    Args:
+        masks (int, 2D or 3D array): Labelled masks. 0=NO masks; 1,2,...=mask labels.
+        device (torch.device, optional): The device to run the computation on. Defaults to None.
+        niter (int, optional): Number of iterations for the diffusion process. Defaults to None.
+    Returns:
+        np.ndarray: A 4D array representing the flows for each pixel in Z, X, and Y.
+    """
+    if device is None:
+        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('mps') if torch.backends.mps.is_available() else None
+    Lz0, Ly0, Lx0 = masks.shape
+    Lz, Ly, Lx = Lz0 + 2, Ly0 + 2, Lx0 + 2
+    masks_padded = torch.from_numpy(masks.astype("int64")).to(device)
+    masks_padded = F.pad(masks_padded, (1, 1, 1, 1, 1, 1))
+    # get mask pixel neighbors
+    z, y, x = torch.nonzero(masks_padded).T
+    neighborsZ = torch.stack((z, z + 1, z - 1, z, z, z, z))
+    neighborsY = torch.stack((y, y, y, y + 1, y - 1, y, y), axis=0)
+    neighborsX = torch.stack((x, x, x, x, x, x + 1, x - 1), axis=0)
+    neighbors = torch.stack((neighborsZ, neighborsY, neighborsX), axis=0)
+    # get mask centers
+    slices = find_objects(masks)
+    centers = np.zeros((masks.max(), 3), "int")
+    for i, si in enumerate(slices):
+        if si is not None:
+            sz, sy, sx = si
+            zi, yi, xi = np.nonzero(masks[sz, sy, sx] == (i + 1))
+            zi = zi.astype(np.int32) + 1  # add padding
+            yi = yi.astype(np.int32) + 1  # add padding
+            xi = xi.astype(np.int32) + 1  # add padding
+            zmed = np.mean(zi)
+            ymed = np.mean(yi)
+            xmed = np.mean(xi)
+            imin = np.argmin((zi - zmed)**2 + (xi - xmed)**2 + (yi - ymed)**2)
+            zmed = zi[imin]
+            ymed = yi[imin]
+            xmed = xi[imin]
+            centers[i, 0] = zmed + sz.start
+            centers[i, 1] = ymed + sy.start
+            centers[i, 2] = xmed + sx.start
+    # get neighbor validator (not all neighbors are in same mask)
+    neighbor_masks = masks_padded[tuple(neighbors)]
+    isneighbor = neighbor_masks == neighbor_masks[0]
+    ext = np.array(
+        [[sz.stop - sz.start + 1, sy.stop - sy.start + 1, sx.stop - sx.start + 1]
+         for sz, sy, sx in slices])
+    n_iter = 6 * (ext.sum(axis=1)).max() if niter is None else niter
+    # run diffusion
+    shape = masks_padded.shape
+    mu = _extend_centers_gpu(neighbors, centers, isneighbor, shape, n_iter=n_iter,
+                             device=device)
+    # normalize
+    mu /= (1e-60 + (mu**2).sum(axis=0)**0.5)
+    # put into original image
+    mu0 = np.zeros((3, Lz0, Ly0, Lx0))
+    mu0[:, z.cpu().numpy() - 1, y.cpu().numpy() - 1, x.cpu().numpy() - 1] = mu
+    return mu0
+def labels_to_flows(labels, files=None, device=None, redo_flows=False, niter=None,
+                    return_flows=True):
+    """Converts labels (list of masks or flows) to flows for training model.
+    Args:
+        labels (list of ND-arrays): The labels to convert. labels[k] can be 2D or 3D. If [3 x Ly x Lx],
+            it is assumed that flows were precomputed. Otherwise, labels[k][0] or labels[k] (if 2D)
+            is used to create flows and cell probabilities.
+        files (list of str, optional): The files to save the flows to. If provided, flows are saved to
+            files to be reused. Defaults to None.
+        device (str, optional): The device to use for computation. Defaults to None.
+        redo_flows (bool, optional): Whether to recompute the flows. Defaults to False.
+        niter (int, optional): The number of iterations for computing flows. Defaults to None.
+    Returns:
+        list of [4 x Ly x Lx] arrays: The flows for training the model. flows[k][0] is labels[k],
+        flows[k][1] is cell distance transform, flows[k][2] is Y flow, flows[k][3] is X flow,
+        and flows[k][4] is heat distribution.
+    """
+    nimg = len(labels)
+    if labels[0].ndim < 3:
+        labels = [labels[n][np.newaxis, :, :] for n in range(nimg)]
+    flows = []
+    # flows need to be recomputed
+    if labels[0].shape[0] == 1 or labels[0].ndim < 3 or redo_flows:
+        dynamics_logger.info("computing flows for labels")
+        # compute flows; labels are fixed here to be unique, so they need to be passed back
+        # make sure labels are unique!
+        labels = [fastremap.renumber(label, in_place=True)[0] for label in labels]
+        iterator = trange if nimg > 1 else range
+        for n in iterator(nimg):
+            labels[n][0] = fastremap.renumber(labels[n][0], in_place=True)[0]
+            vecn = masks_to_flows_gpu(labels[n][0].astype(int), device=device, niter=niter)
+            # concatenate labels, distance transform, vector flows, heat (boundary and mask are computed in augmentations)
+            flow = np.concatenate((labels[n], labels[n] > 0.5, vecn),
+                                  axis=0).astype(np.float32)
+            if files is not None:
+                file_name = os.path.splitext(files[n])[0]
+                tifffile.imwrite(file_name + "_flows.tif", flow)
+            if return_flows:
+                flows.append(flow)
+    else:
+        dynamics_logger.info("flows precomputed")
+        if return_flows:
+            flows = [labels[n].astype(np.float32) for n in range(nimg)]
+    return flows
+def flow_error(maski, dP_net, device=None):
+    """Error in flows from predicted masks vs flows predicted by network run on image.
+    This function serves to benchmark the quality of masks. It works as follows:
+    1. The predicted masks are used to create a flow diagram.
+    2. The mask-flows are compared to the flows that the network predicted.
+    If there is a discrepancy between the flows, it suggests that the mask is incorrect.
+    Masks with flow_errors greater than 0.4 are discarded by default. This setting can be
+    changed in Cellpose.eval or CellposeModel.eval.
+    Args:
+        maski (np.ndarray, int): Masks produced from running dynamics on dP_net, where 0=NO masks; 1,2... are mask labels.
+        dP_net (np.ndarray, float): ND flows where dP_net.shape[1:] = maski.shape.
+    Returns:
+        A tuple containing (flow_errors, dP_masks): flow_errors (np.ndarray, float): Mean squared error between predicted flows and flows from masks;
+        dP_masks (np.ndarray, float): ND flows produced from the predicted masks.
+    """
+    if dP_net.shape[1:] != maski.shape:
+        print("ERROR: net flow is not same size as predicted masks")
+        return
+    # flows predicted from estimated masks
+    dP_masks = masks_to_flows_gpu(maski, device=device)
+    # difference between predicted flows vs mask flows
+    flow_errors = np.zeros(maski.max())
+    for i in range(dP_masks.shape[0]):
+        flow_errors += mean((dP_masks[i] - dP_net[i] / 5.)**2, maski,
+                            index=np.arange(1,
+                                            maski.max() + 1))
+    return flow_errors, dP_masks
+def steps_interp(dP, inds, niter, device=torch.device("cpu")):
+    """ Run dynamics of pixels to recover masks in 2D/3D, with interpolation between pixel values.
+    Euler integration of dynamics dP for niter steps.
+    Args:
+        p (numpy.ndarray): Array of shape (n_points, 2 or 3) representing the initial pixel locations.
+        dP (numpy.ndarray): Array of shape (2, Ly, Lx) or (3, Lz, Ly, Lx) representing the flow field.
+        niter (int): Number of iterations to perform.
+        device (torch.device, optional): Device to use for computation. Defaults to None.
+    Returns:
+        numpy.ndarray: Array of shape (n_points, 2) or (n_points, 3) representing the final pixel locations.
+    Raises:
+        None
+    """
+    shape = dP.shape[1:]
+    ndim = len(shape)
+    pt = torch.zeros((*[1]*ndim, len(inds[0]), ndim), dtype=torch.float32, device=device)
+    im = torch.zeros((1, ndim, *shape), dtype=torch.float32, device=device)
+    # Y and X dimensions, flipped X-1, Y-1
+    # pt is [1 1 1 3 n_points]
+    for n in range(ndim):
+        if ndim==3:
+            pt[0, 0, 0, :, ndim - n - 1] = torch.from_numpy(inds[n]).to(device, dtype=torch.float32)
+        else:
+            pt[0, 0, :, ndim - n - 1] = torch.from_numpy(inds[n]).to(device, dtype=torch.float32)
+        im[0, ndim - n - 1] = torch.from_numpy(dP[n]).to(device, dtype=torch.float32)
+    shape = np.array(shape)[::-1].astype("float") - 1
+    # normalize pt between  0 and  1, normalize the flow
+    for k in range(ndim):
+        im[:, k] *= 2. / shape[k]
+        pt[..., k] /= shape[k]
+    # normalize to between -1 and 1
+    pt *= 2
+    pt -= 1
+    # dynamics
+    for t in range(niter):
+        dPt = torch.nn.functional.grid_sample(im, pt, align_corners=False)
+        for k in range(ndim):  #clamp the final pixel locations
+            pt[..., k] = torch.clamp(pt[..., k] + dPt[:, k], -1., 1.)
+    #undo the normalization from before, reverse order of operations
+    pt += 1
+    pt *= 0.5
+    for k in range(ndim):
+        pt[..., k] *= shape[k]
+    if ndim==3:
+        pt = pt[..., [2, 1, 0]].squeeze()
+        pt = pt.unsqueeze(0) if pt.ndim==1 else pt
+        return pt.T
+    else:
+        pt = pt[..., [1, 0]].squeeze()
+        pt = pt.unsqueeze(0) if pt.ndim==1 else pt
+        return pt.T
+def follow_flows(dP, inds, niter=200, device=torch.device("cpu")):
+    """ Run dynamics to recover masks in 2D or 3D.
+    Pixels are represented as a meshgrid. Only pixels with non-zero cell-probability
+    are used (as defined by inds).
+    Args:
+        dP (np.ndarray): Flows [axis x Ly x Lx] or [axis x Lz x Ly x Lx].
+        mask (np.ndarray, optional): Pixel mask to seed masks. Useful when flows have low magnitudes.
+        niter (int, optional): Number of iterations of dynamics to run. Default is 200.
+        interp (bool, optional): Interpolate during 2D dynamics (not available in 3D). Default is True.
+        device (torch.device, optional): Device to use for computation. Default is None.
+    Returns:
+        A tuple containing (p, inds): p (np.ndarray): Final locations of each pixel after dynamics; [axis x Ly x Lx] or [axis x Lz x Ly x Lx];
+        inds (np.ndarray): Indices of pixels used for dynamics; [axis x Ly x Lx] or [axis x Lz x Ly x Lx].
+    """
+    shape = np.array(dP.shape[1:]).astype(np.int32)
+    ndim = len(inds)
+    p = steps_interp(dP, inds, niter, device=device)
+    return p
+def remove_bad_flow_masks(masks, flows, threshold=0.4, device=torch.device("cpu")):
+    """Remove masks which have inconsistent flows.
+    Uses metrics.flow_error to compute flows from predicted masks
+    and compare flows to predicted flows from the network. Discards
+    masks with flow errors greater than the threshold.
+    Args:
+        masks (int, 2D or 3D array): Labelled masks, 0=NO masks; 1,2,...=mask labels,
+            size [Ly x Lx] or [Lz x Ly x Lx].
+        flows (float, 3D or 4D array): Flows [axis x Ly x Lx] or [axis x Lz x Ly x Lx].
+        threshold (float, optional): Masks with flow error greater than threshold are discarded.
+            Default is 0.4.
+    Returns:
+        masks (int, 2D or 3D array): Masks with inconsistent flow masks removed,
+            0=NO masks; 1,2,...=mask labels, size [Ly x Lx] or [Lz x Ly x Lx].
+    """
+    device0 = device
+    if masks.size > 10000 * 10000 and (device is not None and device.type == "cuda"):
+        major_version, minor_version = torch.__version__.split(".")[:2]
+        torch.cuda.empty_cache()
+        if major_version == "1" and int(minor_version) < 10:
+            # for PyTorch version lower than 1.10
+            def mem_info():
+                total_mem = torch.cuda.get_device_properties(device0.index).total_memory
+                used_mem = torch.cuda.memory_allocated(device0.index)
+                free_mem = total_mem - used_mem
+                return total_mem, free_mem
+        else:
+            # for PyTorch version 1.10 and above
+            def mem_info():
+                free_mem, total_mem = torch.cuda.mem_get_info(device0.index)
+                return total_mem, free_mem
+        total_mem, free_mem = mem_info()
+        if masks.size * 32 > free_mem:
+            dynamics_logger.warning(
+                "WARNING: image is very large, not using gpu to compute flows from masks for QC step flow_threshold"
+            )
+            dynamics_logger.info("turn off QC step with flow_threshold=0 if too slow")
+            device0 = torch.device("cpu")
+    merrors, _ = flow_error(masks, flows, device0)
+    badi = 1 + (merrors > threshold).nonzero()[0]
+    masks[np.isin(masks, badi)] = 0
+    return masks
+def max_pool1d(h, kernel_size=5, axis=1, out=None):
+    """ memory efficient max_pool thanks to Mark Kittisopikul
+    for stride=1, padding=kernel_size//2, requires odd kernel_size >= 3
+    """
+    if out is None:
+        out = h.clone()
+    else:
+        out.copy_(h)
+    nd = h.shape[axis]
+    k0 = kernel_size // 2
+    for d in range(-k0, k0+1):
+        if axis==1:
+            mv = out[:, max(-d,0):min(nd-d,nd)]
+            hv = h[:, max(d,0):min(nd+d,nd)]
+        elif axis==2:
+            mv = out[:, :, max(-d,0):min(nd-d,nd)]
+            hv = h[:,  :, max(d,0):min(nd+d,nd)]
+        elif axis==3:
+            mv = out[:, :, :, max(-d,0):min(nd-d,nd)]
+            hv = h[:, :,  :, max(d,0):min(nd+d,nd)]
+        torch.maximum(mv, hv, out=mv)
+    return out
+def max_pool_nd(h, kernel_size=5):
+    """ memory efficient max_pool in 2d or 3d """
+    ndim = h.ndim - 1
+    hmax = max_pool1d(h, kernel_size=kernel_size, axis=1)
+    hmax2 = max_pool1d(hmax, kernel_size=kernel_size, axis=2)
+    if ndim==2:
+        del hmax
+        return hmax2
+    else:
+        hmax = max_pool1d(hmax2, kernel_size=kernel_size, axis=3, out=hmax)
+        del hmax2
+        return hmax
+def get_masks_torch(pt, inds, shape0, rpad=20, max_size_fraction=0.4):
+    """Create masks using pixel convergence after running dynamics.
+    Makes a histogram of final pixel locations p, initializes masks
+    at peaks of histogram and extends the masks from the peaks so that
+    they include all pixels with more than 2 final pixels p. Discards
+    masks with flow errors greater than the threshold.
+    Parameters:
+        p (float32, 3D or 4D array): Final locations of each pixel after dynamics,
+            size [axis x Ly x Lx] or [axis x Lz x Ly x Lx].
+        iscell (bool, 2D or 3D array): If iscell is not None, set pixels that are
+            iscell False to stay in their original location.
+        rpad (int, optional): Histogram edge padding. Default is 20.
+        max_size_fraction (float, optional): Masks larger than max_size_fraction of
+            total image size are removed. Default is 0.4.
+    Returns:
+        M0 (int, 2D or 3D array): Masks with inconsistent flow masks removed,
+            0=NO masks; 1,2,...=mask labels, size [Ly x Lx] or [Lz x Ly x Lx].
+    """
+    ndim = len(shape0)
+    device = pt.device
+    rpad = 20
+    pt += rpad
+    pt = torch.clamp(pt, min=0)
+    for i in range(len(pt)):
+        pt[i] = torch.clamp(pt[i], max=shape0[i]+rpad-1)
+    # # add extra padding to make divisible by 5
+    # shape = tuple((np.ceil((shape0 + 2*rpad)/5) * 5).astype(int))
+    shape = tuple(np.array(shape0) + 2*rpad)
+    # sparse coo torch
+    coo = torch.sparse_coo_tensor(pt, torch.ones(pt.shape[1], device=pt.device, dtype=torch.int),
+                                shape)
+    h1 = coo.to_dense()
+    del coo
+    hmax1 = max_pool_nd(h1.unsqueeze(0), kernel_size=5)
+    hmax1 = hmax1.squeeze()
+    seeds1 = torch.nonzero((h1 - hmax1 > -1e-6) * (h1 > 10))
+    del hmax1
+    if len(seeds1) == 0:
+        dynamics_logger.warning("no seeds found in get_masks_torch - no masks found.")
+        return np.zeros(shape0, dtype="uint16")
+    npts = h1[tuple(seeds1.T)]
+    isort1 = npts.argsort()
+    seeds1 = seeds1[isort1]
+    n_seeds = len(seeds1)
+    h_slc = torch.zeros((n_seeds, *[11]*ndim), device=seeds1.device)
+    for k in range(n_seeds):
+        slc = tuple([slice(seeds1[k][j]-5, seeds1[k][j]+6) for j in range(ndim)])
+        h_slc[k] = h1[slc]
+    del h1
+    seed_masks = torch.zeros((n_seeds, *[11]*ndim), device=seeds1.device)
+    if ndim==2:
+        seed_masks[:,5,5] = 1
+    else:
+        seed_masks[:,5,5,5] = 1
+    for iter in range(5):
+        # extend
+        seed_masks = max_pool_nd(seed_masks, kernel_size=3)
+        seed_masks *= h_slc > 2
+    del h_slc
+    seeds_new = [tuple((torch.nonzero(seed_masks[k]) + seeds1[k] - 5).T)
+            for k in range(n_seeds)]
+    del seed_masks
+    dtype = torch.int32 if n_seeds < 2**16 else torch.int64
+    M1 = torch.zeros(shape, dtype=dtype, device=device)
+    for k in range(n_seeds):
+        M1[seeds_new[k]] = 1 + k
+    M1 = M1[tuple(pt)]
+    M1 = M1.cpu().numpy()
+    dtype = "uint16" if n_seeds < 2**16 else "uint32"
+    M0 = np.zeros(shape0, dtype=dtype)
+    M0[inds] = M1
+    # remove big masks
+    uniq, counts = fastremap.unique(M0, return_counts=True)
+    big = np.prod(shape0) * max_size_fraction
+    bigc = uniq[counts > big]
+    if len(bigc) > 0 and (len(bigc) > 1 or bigc[0] != 0):
+        M0 = fastremap.mask(M0, bigc)
+    fastremap.renumber(M0, in_place=True)  #convenient to guarantee non-skipped labels
+    M0 = M0.reshape(tuple(shape0))
+    #print(f"mem used: {torch.cuda.memory_allocated()/1e9:.3f} gb, max mem used: {torch.cuda.max_memory_allocated()/1e9:.3f} gb")
+    return M0
+def resize_and_compute_masks(dP, cellprob, niter=200, cellprob_threshold=0.0,
+                             flow_threshold=0.4, do_3D=False, min_size=15,
+                             max_size_fraction=0.4, resize=None, device=torch.device("cpu")):
+    """Compute masks using dynamics from dP and cellprob, and resizes masks if resize is not None.
+    Args:
+        dP (numpy.ndarray): The dynamics flow field array.
+        cellprob (numpy.ndarray): The cell probability array.
+        p (numpy.ndarray, optional): The pixels on which to run dynamics. Defaults to None
+        niter (int, optional): The number of iterations for mask computation. Defaults to 200.
+        cellprob_threshold (float, optional): The threshold for cell probability. Defaults to 0.0.
+        flow_threshold (float, optional): The threshold for quality control metrics. Defaults to 0.4.
+        interp (bool, optional): Whether to interpolate during dynamics computation. Defaults to True.
+        do_3D (bool, optional): Whether to perform mask computation in 3D. Defaults to False.
+        min_size (int, optional): The minimum size of the masks. Defaults to 15.
+        max_size_fraction (float, optional): Masks larger than max_size_fraction of
+            total image size are removed. Default is 0.4.
+        resize (tuple, optional): The desired size for resizing the masks. Defaults to None.
+        device (torch.device, optional): The device to use for computation. Defaults to torch.device("cpu").
+    Returns:
+        tuple: A tuple containing the computed masks and the final pixel locations.
+    """
+    mask = compute_masks(dP, cellprob, niter=niter,
+                            cellprob_threshold=cellprob_threshold,
+                            flow_threshold=flow_threshold, do_3D=do_3D,
+                            max_size_fraction=max_size_fraction,
+                            device=device)
+    if resize is not None:
+        dynamics_logger.warning("Resizing is depricated in v4.0.1+")
+    mask = utils.fill_holes_and_remove_small_masks(mask, min_size=min_size)
+    return mask
+def compute_masks(dP, cellprob, p=None, niter=200, cellprob_threshold=0.0,
+                  flow_threshold=0.4, do_3D=False, min_size=-1,
+                  max_size_fraction=0.4, device=torch.device("cpu")):
+    """Compute masks using dynamics from dP and cellprob.
+    Args:
+        dP (numpy.ndarray): The dynamics flow field array.
+        cellprob (numpy.ndarray): The cell probability array.
+        p (numpy.ndarray, optional): The pixels on which to run dynamics. Defaults to None
+        niter (int, optional): The number of iterations for mask computation. Defaults to 200.
+        cellprob_threshold (float, optional): The threshold for cell probability. Defaults to 0.0.
+        flow_threshold (float, optional): The threshold for quality control metrics. Defaults to 0.4.
+        interp (bool, optional): Whether to interpolate during dynamics computation. Defaults to True.
+        do_3D (bool, optional): Whether to perform mask computation in 3D. Defaults to False.
+        min_size (int, optional): The minimum size of the masks. Defaults to 15.
+        max_size_fraction (float, optional): Masks larger than max_size_fraction of
+            total image size are removed. Default is 0.4.
+        device (torch.device, optional): The device to use for computation. Defaults to torch.device("cpu").
+    Returns:
+        tuple: A tuple containing the computed masks and the final pixel locations.
+    """
+    if (cellprob > cellprob_threshold).sum():  #mask at this point is a cell cluster binary map, not labels
+        inds = np.nonzero(cellprob > cellprob_threshold)
+        if len(inds[0]) == 0:
+            dynamics_logger.info("No cell pixels found.")
+            shape = cellprob.shape
+            mask = np.zeros(shape, "uint16")
+            return mask
+        p_final = follow_flows(dP * (cellprob > cellprob_threshold) / 5.,
+                               inds=inds, niter=niter,
+                                device=device)
+        if not torch.is_tensor(p_final):
+            p_final = torch.from_numpy(p_final).to(device, dtype=torch.int)
+        else:
+            p_final = p_final.int()
+        # calculate masks
+        if device.type == "mps":
+            p_final = p_final.to(torch.device("cpu"))
+        mask = get_masks_torch(p_final, inds, dP.shape[1:],
+                               max_size_fraction=max_size_fraction)
+        del p_final
+        # flow thresholding factored out of get_masks
+        if not do_3D:
+            if mask.max() > 0 and flow_threshold is not None and flow_threshold > 0:
+                # make sure labels are unique at output of get_masks
+                mask = remove_bad_flow_masks(mask, dP, threshold=flow_threshold,
+                                             device=device)
+        if mask.max() < 2**16 and mask.dtype != "uint16":
+            mask = mask.astype("uint16")
+    else:  # nothing to compute, just make it compatible
+        dynamics_logger.info("No cell pixels found.")
+        shape = cellprob.shape
+        mask = np.zeros(cellprob.shape, "uint16")
+        return mask
+    if min_size > 0:
+        mask = utils.fill_holes_and_remove_small_masks(mask, min_size=min_size)
+    if mask.dtype == np.uint32:
+        dynamics_logger.warning(
+            "more than 65535 masks in image, masks returned as np.uint32")
+    return mask

models/seg_post_model/cellpose/export.py ADDED Viewed

	@@ -0,0 +1,405 @@

+"""Auxiliary module for bioimageio format export
+Example usage:
+```bash
+#!/bin/bash
+# Define default paths and parameters
+DEFAULT_CHANNELS="1 0"
+DEFAULT_PATH_PRETRAINED_MODEL="/home/qinyu/models/cp/cellpose_residual_on_style_on_concatenation_off_1135_rest_2023_05_04_23_41_31.252995"
+DEFAULT_PATH_README="/home/qinyu/models/cp/README.md"
+DEFAULT_LIST_PATH_COVER_IMAGES="/home/qinyu/images/cp/cellpose_raw_and_segmentation.jpg /home/qinyu/images/cp/cellpose_raw_and_probability.jpg /home/qinyu/images/cp/cellpose_raw.jpg"
+DEFAULT_MODEL_ID="philosophical-panda"
+DEFAULT_MODEL_ICON="🐼"
+DEFAULT_MODEL_VERSION="0.1.0"
+DEFAULT_MODEL_NAME="My Cool Cellpose"
+DEFAULT_MODEL_DOCUMENTATION="A cool Cellpose model trained for my cool dataset."
+DEFAULT_MODEL_AUTHORS='[{"name": "Qin Yu", "affiliation": "EMBL", "github_user": "qin-yu", "orcid": "0000-0002-4652-0795"}]'
+DEFAULT_MODEL_CITE='[{"text": "For more details of the model itself, see the manuscript", "doi": "10.1242/dev.202800", "url": null}]'
+DEFAULT_MODEL_TAGS="cellpose 3d 2d"
+DEFAULT_MODEL_LICENSE="MIT"
+DEFAULT_MODEL_REPO="https://github.com/kreshuklab/go-nuclear"
+# Run the Python script with default parameters
+python export.py \
+    --channels $DEFAULT_CHANNELS \
+    --path_pretrained_model "$DEFAULT_PATH_PRETRAINED_MODEL" \
+    --path_readme "$DEFAULT_PATH_README" \
+    --list_path_cover_images $DEFAULT_LIST_PATH_COVER_IMAGES \
+    --model_version "$DEFAULT_MODEL_VERSION" \
+    --model_name "$DEFAULT_MODEL_NAME" \
+    --model_documentation "$DEFAULT_MODEL_DOCUMENTATION" \
+    --model_authors "$DEFAULT_MODEL_AUTHORS" \
+    --model_cite "$DEFAULT_MODEL_CITE" \
+    --model_tags $DEFAULT_MODEL_TAGS \
+    --model_license "$DEFAULT_MODEL_LICENSE" \
+    --model_repo "$DEFAULT_MODEL_REPO"
+```
+"""
+import os
+import sys
+import json
+import argparse
+from pathlib import Path
+from urllib.parse import urlparse
+import torch
+import numpy as np
+from cellpose.io import imread
+from cellpose.utils import download_url_to_file
+from cellpose.transforms import pad_image_ND, normalize_img, convert_image
+from cellpose.vit_sam import CPnetBioImageIO
+from bioimageio.spec.model.v0_5 import (
+    ArchitectureFromFileDescr,
+    Author,
+    AxisId,
+    ChannelAxis,
+    CiteEntry,
+    Doi,
+    FileDescr,
+    Identifier,
+    InputTensorDescr,
+    IntervalOrRatioDataDescr,
+    LicenseId,
+    ModelDescr,
+    ModelId,
+    OrcidId,
+    OutputTensorDescr,
+    ParameterizedSize,
+    PytorchStateDictWeightsDescr,
+    SizeReference,
+    SpaceInputAxis,
+    SpaceOutputAxis,
+    TensorId,
+    TorchscriptWeightsDescr,
+    Version,
+    WeightsDescr,
+)
+# Define ARBITRARY_SIZE if it is not available in the module
+try:
+    from bioimageio.spec.model.v0_5 import ARBITRARY_SIZE
+except ImportError:
+    ARBITRARY_SIZE = ParameterizedSize(min=1, step=1)
+from bioimageio.spec.common import HttpUrl
+from bioimageio.spec import save_bioimageio_package
+from bioimageio.core import test_model
+DEFAULT_CHANNELS = [2, 1]
+DEFAULT_NORMALIZE_PARAMS = {
+    "axis": -1,
+    "lowhigh": None,
+    "percentile": None,
+    "normalize": True,
+    "norm3D": False,
+    "sharpen_radius": 0,
+    "smooth_radius": 0,
+    "tile_norm_blocksize": 0,
+    "tile_norm_smooth3D": 1,
+    "invert": False,
+}
+IMAGE_URL = "http://www.cellpose.org/static/data/rgb_3D.tif"
+def download_and_normalize_image(path_dir_temp, channels=DEFAULT_CHANNELS):
+    """
+    Download and normalize image.
+    """
+    filename = os.path.basename(urlparse(IMAGE_URL).path)
+    path_image = path_dir_temp / filename
+    if not path_image.exists():
+        sys.stderr.write(f'Downloading: "{IMAGE_URL}" to {path_image}\n')
+        download_url_to_file(IMAGE_URL, path_image)
+    img = imread(path_image).astype(np.float32)
+    img = convert_image(img, channels, channel_axis=1, z_axis=0, do_3D=False, nchan=2)
+    img = normalize_img(img, **DEFAULT_NORMALIZE_PARAMS)
+    img = np.transpose(img, (0, 3, 1, 2))
+    img, _, _ = pad_image_ND(img)
+    return img
+def load_bioimageio_cpnet_model(path_model_weight, nchan=2):
+    cpnet_kwargs = {
+        "nout": 3,
+    }
+    cpnet_biio = CPnetBioImageIO(**cpnet_kwargs)
+    state_dict_cuda = torch.load(path_model_weight, map_location=torch.device("cpu"), weights_only=True)
+    cpnet_biio.load_state_dict(state_dict_cuda)
+    cpnet_biio.eval()  # crucial for the prediction results
+    return cpnet_biio, cpnet_kwargs
+def descr_gen_input(path_test_input, nchan=2):
+    input_axes = [
+        SpaceInputAxis(id=AxisId("z"), size=ARBITRARY_SIZE),
+        ChannelAxis(channel_names=[Identifier(f"c{i+1}") for i in range(nchan)]),
+        SpaceInputAxis(id=AxisId("y"), size=ParameterizedSize(min=16, step=16)),
+        SpaceInputAxis(id=AxisId("x"), size=ParameterizedSize(min=16, step=16)),
+    ]
+    data_descr = IntervalOrRatioDataDescr(type="float32")
+    path_test_input = Path(path_test_input)
+    descr_input = InputTensorDescr(
+        id=TensorId("raw"),
+        axes=input_axes,
+        test_tensor=FileDescr(source=path_test_input),
+        data=data_descr,
+    )
+    return descr_input
+def descr_gen_output_flow(path_test_output):
+    output_axes_output_tensor = [
+        SpaceOutputAxis(id=AxisId("z"), size=SizeReference(tensor_id=TensorId("raw"), axis_id=AxisId("z"))),
+        ChannelAxis(channel_names=[Identifier("flow1"), Identifier("flow2"), Identifier("flow3")]),
+        SpaceOutputAxis(id=AxisId("y"), size=SizeReference(tensor_id=TensorId("raw"), axis_id=AxisId("y"))),
+        SpaceOutputAxis(id=AxisId("x"), size=SizeReference(tensor_id=TensorId("raw"), axis_id=AxisId("x"))),
+    ]
+    path_test_output = Path(path_test_output)
+    descr_output = OutputTensorDescr(
+        id=TensorId("flow"),
+        axes=output_axes_output_tensor,
+        test_tensor=FileDescr(source=path_test_output),
+    )
+    return descr_output
+def descr_gen_output_downsampled(path_dir_temp, nbase=None):
+    if nbase is None:
+        nbase = [32, 64, 128, 256]
+    output_axes_downsampled_tensors = [
+        [
+            SpaceOutputAxis(id=AxisId("z"), size=SizeReference(tensor_id=TensorId("raw"), axis_id=AxisId("z"))),
+            ChannelAxis(channel_names=[Identifier(f"feature{i+1}") for i in range(base)]),
+            SpaceOutputAxis(
+                id=AxisId("y"),
+                size=SizeReference(tensor_id=TensorId("raw"), axis_id=AxisId("y")),
+                scale=2**offset,
+            ),
+            SpaceOutputAxis(
+                id=AxisId("x"),
+                size=SizeReference(tensor_id=TensorId("raw"), axis_id=AxisId("x")),
+                scale=2**offset,
+            ),
+        ]
+        for offset, base in enumerate(nbase)
+    ]
+    path_downsampled_tensors = [
+        Path(path_dir_temp / f"test_downsampled_{i}.npy") for i in range(len(output_axes_downsampled_tensors))
+    ]
+    descr_output_downsampled_tensors = [
+        OutputTensorDescr(
+            id=TensorId(f"downsampled_{i}"),
+            axes=axes,
+            test_tensor=FileDescr(source=path),
+        )
+        for i, (axes, path) in enumerate(zip(output_axes_downsampled_tensors, path_downsampled_tensors))
+    ]
+    return descr_output_downsampled_tensors
+def descr_gen_output_style(path_test_style, nchannel=256):
+    output_axes_style_tensor = [
+        SpaceOutputAxis(id=AxisId("z"), size=SizeReference(tensor_id=TensorId("raw"), axis_id=AxisId("z"))),
+        ChannelAxis(channel_names=[Identifier(f"feature{i+1}") for i in range(nchannel)]),
+    ]
+    path_style_tensor = Path(path_test_style)
+    descr_output_style_tensor = OutputTensorDescr(
+        id=TensorId("style"),
+        axes=output_axes_style_tensor,
+        test_tensor=FileDescr(source=path_style_tensor),
+    )
+    return descr_output_style_tensor
+def descr_gen_arch(cpnet_kwargs, path_cpnet_wrapper=None):
+    if path_cpnet_wrapper is None:
+        path_cpnet_wrapper = Path(__file__).parent / "resnet_torch.py"
+    pytorch_architecture = ArchitectureFromFileDescr(
+        callable=Identifier("CPnetBioImageIO"),
+        source=Path(path_cpnet_wrapper),
+        kwargs=cpnet_kwargs,
+    )
+    return pytorch_architecture
+def descr_gen_documentation(path_doc, markdown_text):
+    with open(path_doc, "w") as f:
+        f.write(markdown_text)
+def package_to_bioimageio(
+    path_pretrained_model,
+    path_save_trace,
+    path_readme,
+    list_path_cover_images,
+    descr_input,
+    descr_output,
+    descr_output_downsampled_tensors,
+    descr_output_style_tensor,
+    pytorch_version,
+    pytorch_architecture,
+    model_id,
+    model_icon,
+    model_version,
+    model_name,
+    model_documentation,
+    model_authors,
+    model_cite,
+    model_tags,
+    model_license,
+    model_repo,
+):
+    """Package model description to BioImage.IO format."""
+    my_model_descr = ModelDescr(
+        id=ModelId(model_id) if model_id is not None else None,
+        id_emoji=model_icon,
+        version=Version(model_version),
+        name=model_name,
+        description=model_documentation,
+        authors=[
+            Author(
+                name=author["name"],
+                affiliation=author["affiliation"],
+                github_user=author["github_user"],
+                orcid=OrcidId(author["orcid"]),
+            )
+            for author in model_authors
+        ],
+        cite=[CiteEntry(text=cite["text"], doi=Doi(cite["doi"]), url=cite["url"]) for cite in model_cite],
+        covers=[Path(img) for img in list_path_cover_images],
+        license=LicenseId(model_license),
+        tags=model_tags,
+        documentation=Path(path_readme),
+        git_repo=HttpUrl(model_repo),
+        inputs=[descr_input],
+        outputs=[descr_output, descr_output_style_tensor] + descr_output_downsampled_tensors,
+        weights=WeightsDescr(
+            pytorch_state_dict=PytorchStateDictWeightsDescr(
+                source=Path(path_pretrained_model),
+                architecture=pytorch_architecture,
+                pytorch_version=pytorch_version,
+            ),
+            torchscript=TorchscriptWeightsDescr(
+                source=Path(path_save_trace),
+                pytorch_version=pytorch_version,
+                parent="pytorch_state_dict",  # these weights were converted from the pytorch_state_dict weights.
+            ),
+        ),
+    )
+    return my_model_descr
+def parse_args():
+    # fmt: off
+    parser = argparse.ArgumentParser(description="BioImage.IO model packaging for Cellpose")
+    parser.add_argument("--channels", nargs=2, default=[2, 1], type=int, help="Cyto-only = [2, 0], Cyto + Nuclei = [2, 1], Nuclei-only = [1, 0]")
+    parser.add_argument("--path_pretrained_model", required=True, type=str, help="Path to pretrained model file, e.g., cellpose_residual_on_style_on_concatenation_off_1135_rest_2023_05_04_23_41_31.252995")
+    parser.add_argument("--path_readme", required=True, type=str, help="Path to README file")
+    parser.add_argument("--list_path_cover_images", nargs='+', required=True, type=str, help="List of paths to cover images")
+    parser.add_argument("--model_id", type=str, help="Model ID, provide if already exists", default=None)
+    parser.add_argument("--model_icon", type=str, help="Model icon, provide if already exists", default=None)
+    parser.add_argument("--model_version", required=True, type=str, help="Model version, new model should be 0.1.0")
+    parser.add_argument("--model_name", required=True, type=str, help="Model name, e.g., My Cool Cellpose")
+    parser.add_argument("--model_documentation", required=True, type=str, help="Model documentation, e.g., A cool Cellpose model trained for my cool dataset.")
+    parser.add_argument("--model_authors", required=True, type=str, help="Model authors in JSON format, e.g., '[{\"name\": \"Qin Yu\", \"affiliation\": \"EMBL\", \"github_user\": \"qin-yu\", \"orcid\": \"0000-0002-4652-0795\"}]'")
+    parser.add_argument("--model_cite", required=True, type=str, help="Model citation in JSON format, e.g., '[{\"text\": \"For more details of the model itself, see the manuscript\", \"doi\": \"10.1242/dev.202800\", \"url\": null}]'")
+    parser.add_argument("--model_tags", nargs='+', required=True, type=str, help="Model tags, e.g., cellpose 3d 2d")
+    parser.add_argument("--model_license", required=True, type=str, help="Model license, e.g., MIT")
+    parser.add_argument("--model_repo", required=True, type=str, help="Model repository URL")
+    return parser.parse_args()
+    # fmt: on
+def main():
+    args = parse_args()
+    # Parse user-provided paths and arguments
+    channels = args.channels
+    model_cite = json.loads(args.model_cite)
+    model_authors = json.loads(args.model_authors)
+    path_readme = Path(args.path_readme)
+    path_pretrained_model = Path(args.path_pretrained_model)
+    list_path_cover_images = [Path(path_image) for path_image in args.list_path_cover_images]
+    # Auto-generated paths
+    path_cpnet_wrapper = Path(__file__).resolve().parent / "resnet_torch.py"
+    path_dir_temp = Path(__file__).resolve().parent.parent / "models" / path_pretrained_model.stem
+    path_dir_temp.mkdir(parents=True, exist_ok=True)
+    path_save_trace = path_dir_temp / "cp_traced.pt"
+    path_test_input = path_dir_temp / "test_input.npy"
+    path_test_output = path_dir_temp / "test_output.npy"
+    path_test_style = path_dir_temp / "test_style.npy"
+    path_bioimageio_package = path_dir_temp / "cellpose_model.zip"
+    # Download test input image
+    img_np = download_and_normalize_image(path_dir_temp, channels=channels)
+    np.save(path_test_input, img_np)
+    img = torch.tensor(img_np).float()
+    # Load model
+    cpnet_biio, cpnet_kwargs = load_bioimageio_cpnet_model(path_pretrained_model)
+    # Test model and save output
+    tuple_output_tensor = cpnet_biio(img)
+    np.save(path_test_output, tuple_output_tensor[0].detach().numpy())
+    np.save(path_test_style, tuple_output_tensor[1].detach().numpy())
+    for i, t in enumerate(tuple_output_tensor[2:]):
+        np.save(path_dir_temp / f"test_downsampled_{i}.npy", t.detach().numpy())
+    # Save traced model
+    model_traced = torch.jit.trace(cpnet_biio, img)
+    model_traced.save(path_save_trace)
+    # Generate model description
+    descr_input = descr_gen_input(path_test_input)
+    descr_output = descr_gen_output_flow(path_test_output)
+    descr_output_downsampled_tensors = descr_gen_output_downsampled(path_dir_temp, nbase=cpnet_biio.nbase[1:])
+    descr_output_style_tensor = descr_gen_output_style(path_test_style, cpnet_biio.nbase[-1])
+    pytorch_version = Version(torch.__version__)
+    pytorch_architecture = descr_gen_arch(cpnet_kwargs, path_cpnet_wrapper)
+    # Package model
+    my_model_descr = package_to_bioimageio(
+        path_pretrained_model,
+        path_save_trace,
+        path_readme,
+        list_path_cover_images,
+        descr_input,
+        descr_output,
+        descr_output_downsampled_tensors,
+        descr_output_style_tensor,
+        pytorch_version,
+        pytorch_architecture,
+        args.model_id,
+        args.model_icon,
+        args.model_version,
+        args.model_name,
+        args.model_documentation,
+        model_authors,
+        model_cite,
+        args.model_tags,
+        args.model_license,
+        args.model_repo,
+    )
+    # Test model
+    summary = test_model(my_model_descr, weight_format="pytorch_state_dict")
+    summary.display()
+    summary = test_model(my_model_descr, weight_format="torchscript")
+    summary.display()
+    # Save BioImage.IO package
+    package_path = save_bioimageio_package(my_model_descr, output_path=Path(path_bioimageio_package))
+    print("package path:", package_path)
+if __name__ == "__main__":
+    main()