Image Segmentation
Transformers
PyTorch
pixdlm
cvpr-2026
compute-transparency
reasoning-segmentation
uav
remote-sensing
vision-language
Instructions to use WhynotHug/PixDLM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use WhynotHug/PixDLM with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-segmentation", model="WhynotHug/PixDLM")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("WhynotHug/PixDLM", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from .clip_encoder import CLIPVisionTower | |
| from .multipath_encoder_wapper import MultiPathCLIPVisionTower | |
| import os | |
| def build_vision_tower(vision_tower_cfg, **kwargs): | |
| vision_tower = getattr( | |
| vision_tower_cfg, | |
| "mm_vision_tower", | |
| getattr(vision_tower_cfg, "vision_tower", None), | |
| ) | |
| is_multipath_encoder = getattr(vision_tower_cfg, 'is_multipath_encoder', False) | |
| if is_multipath_encoder: | |
| if os.environ.get("PIXDLM_VERBOSE", "0") == "1": | |
| print("build MultiPathCLIPVisionTower") | |
| return MultiPathCLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) | |
| elif ( | |
| vision_tower.startswith("openai") | |
| or vision_tower.startswith("laion") | |
| or "clip" in vision_tower | |
| ): | |
| return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) | |
| raise ValueError(f"Unknown vision tower: {vision_tower}") | |