Image Segmentation
Transformers
PyTorch
pixdlm
cvpr-2026
compute-transparency
reasoning-segmentation
uav
remote-sensing
vision-language
Instructions to use WhynotHug/PixDLM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use WhynotHug/PixDLM with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-segmentation", model="WhynotHug/PixDLM")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("WhynotHug/PixDLM", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from typing import Type | |
| import torch | |
| import torch.nn as nn | |
| class MLPBlock(nn.Module): | |
| def __init__( | |
| self, | |
| embedding_dim: int, | |
| mlp_dim: int, | |
| act: Type[nn.Module] = nn.GELU, | |
| ) -> None: | |
| super().__init__() | |
| self.lin1 = nn.Linear(embedding_dim, mlp_dim) | |
| self.lin2 = nn.Linear(mlp_dim, embedding_dim) | |
| self.act = act() | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| return self.lin2(self.act(self.lin1(x))) | |
| class LayerNorm2d(nn.Module): | |
| def __init__(self, num_channels: int, eps: float = 1e-6) -> None: | |
| super().__init__() | |
| self.weight = nn.Parameter(torch.ones(num_channels)) | |
| self.bias = nn.Parameter(torch.zeros(num_channels)) | |
| self.eps = eps | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| u = x.mean(1, keepdim=True) | |
| s = (x - u).pow(2).mean(1, keepdim=True) | |
| x = (x - u) / torch.sqrt(s + self.eps) | |
| x = self.weight[:, None, None] * x + self.bias[:, None, None] | |
| return x | |