BiliSakura commited on Mar 24

Commit

fabc606

0 Parent(s):

Duplicate from BiliSakura/SegEarth-OV

Browse files

Co-authored-by: Sakura <BiliSakura@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +13 -0
OV-2/config.json +12 -0
OV-2/configs/cls_chn6-cug.txt +2 -0
OV-2/configs/cls_ddhrnet_korea_sar.txt +5 -0
OV-2/configs/cls_ddhrnet_shandong_sar.txt +5 -0
OV-2/configs/cls_ddhrnet_xian_sar.txt +5 -0
OV-2/configs/cls_deepglobe.txt +7 -0
OV-2/configs/cls_fusar.txt +5 -0
OV-2/configs/cls_hrsid.txt +2 -0
OV-2/configs/cls_iSAID.txt +16 -0
OV-2/configs/cls_inria.txt +2 -0
OV-2/configs/cls_loveda.txt +7 -0
OV-2/configs/cls_massachusetts_building.txt +2 -0
OV-2/configs/cls_openearthmap.txt +9 -0
OV-2/configs/cls_openearthmap_sar.txt +9 -0
OV-2/configs/cls_pie_sar.txt +6 -0
OV-2/configs/cls_potsdam.txt +6 -0
OV-2/configs/cls_roadval.txt +2 -0
OV-2/configs/cls_uavid.txt +7 -0
OV-2/configs/cls_udd5.txt +5 -0
OV-2/configs/cls_vaihingen.txt +6 -0
OV-2/configs/cls_vdd.txt +7 -0
OV-2/configs/cls_wbs-si.txt +2 -0
OV-2/configs/cls_whu.txt +2 -0
OV-2/configs/cls_whu_sar.txt +7 -0
OV-2/configs/cls_xBD.txt +2 -0
OV-2/configs/cls_yeseg_sar.txt +6 -0
OV-2/pipeline.py +38 -0
OV-2/prompts/imagenet_template.py +97 -0
OV-2/upsamplers.py +251 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/README.md +170 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/cls_openearthmap_sar.txt +9 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/config.json +42 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_ddhrnet_xian_sar.txt +5 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_fusar.txt +5 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_hrsid.txt +2 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_openearthmap_sar.txt +9 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_pie_sar.txt +6 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_whu_sar.txt +7 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_yeseg_sar.txt +6 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/conversion_meta.json +12 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/README.md +8 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/cvt_label.png +0 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/label.png +0 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/rgb.png +0 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/sar.png +0 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_result.png +3 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/eval.py +163 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/merges.txt +0 -0
OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/model.safetensors +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,13 @@

+OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/model.safetensors filter=lfs diff=lfs merge=lfs -text
+OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_result.png filter=lfs diff=lfs merge=lfs -text
+OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/simfeatup/xclip_jbu_one_million_aid.ckpt filter=lfs diff=lfs merge=lfs -text
+OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/simfeatup/clip_jbu_stack_cocostuff.ckpt filter=lfs diff=lfs merge=lfs -text
+OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/simfeatup/maskclip_jbu_stack_cocostuff.ckpt filter=lfs diff=lfs merge=lfs -text
+OV-2/weights/featup/xclip_jbu_one_million_aid.ckpt filter=lfs diff=lfs merge=lfs -text
+OV-2/weights/featup/clip_jbu_stack_cocostuff.ckpt filter=lfs diff=lfs merge=lfs -text
+OV-2/weights/featup/maskclip_jbu_stack_cocostuff.ckpt filter=lfs diff=lfs merge=lfs -text
+OV-3/weights/backbone/sam3/model.safetensors filter=lfs diff=lfs merge=lfs -text
+OV/weights/backbone/clip-vit-base-patch16/model.safetensors filter=lfs diff=lfs merge=lfs -text
+OV/weights/featup/xclip_jbu_one_million_aid.ckpt filter=lfs diff=lfs merge=lfs -text
+OV/weights/featup/clip_jbu_stack_cocostuff.ckpt filter=lfs diff=lfs merge=lfs -text
+OV/weights/featup/maskclip_jbu_stack_cocostuff.ckpt filter=lfs diff=lfs merge=lfs -text

OV-2/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "version": "OV-2",
+  "backbone": "AlignEarth",
+  "model_id": "BiliSakura/AlignEarth-SAR-ViT-B-16",
+  "vit_type": "ViT-B/16",
+  "featup": "jbu_one",
+  "featup_weights": "weights/featup/xclip_jbu_one_million_aid.ckpt",
+  "cls_token_lambda": -0.3,
+  "logit_scale": 50.0,
+  "notes": "SAR-adapted CLIP via knowledge distillation. Also supports openai/clip-vit-base-patch16.",
+  "local_backbone": "weights/backbone/AlignEarth-SAR-ViT-B-16"
+}

OV-2/configs/cls_chn6-cug.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ background
2	+ road

OV-2/configs/cls_ddhrnet_korea_sar.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+building
+road
+greenery
+water
+farmland,grass

OV-2/configs/cls_ddhrnet_shandong_sar.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+farmland,grass
+greenery
+road
+building
+water

OV-2/configs/cls_ddhrnet_xian_sar.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+building
+road
+farmland,grass
+greenery
+water

OV-2/configs/cls_deepglobe.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+urban
+agriculture
+rangeland
+forest
+water
+barren
+background

OV-2/configs/cls_fusar.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+others
+water
+road
+building
+grass

OV-2/configs/cls_hrsid.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ background
2	+ ship,boat

OV-2/configs/cls_iSAID.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+background
+ship
+store tank
+baseball diamond
+tennis court
+basketball court
+ground track field
+bridge
+large vehicle
+small vehicle
+helicopter
+swimming pool
+roundabout
+soccer ball field
+plane
+harbor

OV-2/configs/cls_inria.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ background
2	+ building

OV-2/configs/cls_loveda.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+background
+building,roof,house
+road
+water
+barren
+forest
+agricultural

OV-2/configs/cls_massachusetts_building.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ background
2	+ building

OV-2/configs/cls_openearthmap.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+background
+bareland,barren
+grass
+pavement
+road
+tree,forest
+water,river
+cropland
+building,roof,house

OV-2/configs/cls_openearthmap_sar.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+background
+bareland,barren
+grass
+pavement
+road
+tree,forest
+water,river
+cropland
+building,roof,house

OV-2/configs/cls_pie_sar.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+background
+city
+road
+water,river
+forest
+cropland

OV-2/configs/cls_potsdam.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+road
+building
+grass
+tree
+car
+clutter,background

OV-2/configs/cls_roadval.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ background
2	+ road

OV-2/configs/cls_uavid.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+background
+building
+road
+car
+tree
+vegetation
+human

OV-2/configs/cls_udd5.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+vegetation
+building
+road
+vehicle
+background

OV-2/configs/cls_vaihingen.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+road
+building
+grass
+tree
+car
+clutter

OV-2/configs/cls_vdd.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+background
+facade
+road
+vegetation
+vehicle
+roof
+water

OV-2/configs/cls_wbs-si.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ background
2	+ water

OV-2/configs/cls_whu.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ background
2	+ building

OV-2/configs/cls_whu_sar.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+farmland
+city
+village
+water
+forest
+road
+others

OV-2/configs/cls_xBD.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ background
2	+ building

OV-2/configs/cls_yeseg_sar.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+bareground,barren
+grass,farmland
+dense tree cover
+city
+water
+roadway

OV-2/pipeline.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""
+SegEarth OV-2 pipeline. Self-contained; uses config.json and weights/featup/ in this folder.
+"""
+import json
+from pathlib import Path
+import sys
+_parent = Path(__file__).resolve().parent.parent
+if str(_parent) not in sys.path:
+    sys.path.insert(0, str(_parent))
+from pipeline import SegEarthPipelineCLIP
+def load(config_path: Path = None, model_id: str = None, **kwargs):
+    """Load OV-2 pipeline with config from this folder."""
+    repo_dir = Path(__file__).parent
+    cfg_path = config_path or repo_dir / "config.json"
+    with open(cfg_path) as f:
+        cfg = json.load(f)
+    local_backbone = cfg.get("local_backbone")
+    if local_backbone:
+        local_path = repo_dir / local_backbone
+        if local_path.exists():
+            kwargs.setdefault("model_id", str(local_path))
+    if "model_id" not in kwargs:
+        kwargs.setdefault("model_id", model_id or cfg["model_id"])
+    kwargs.setdefault("featup_model", cfg.get("featup") or "jbu_one")
+    kwargs.setdefault("cls_token_lambda", cfg.get("cls_token_lambda", -0.3))
+    kwargs.setdefault("logit_scale", cfg.get("logit_scale", 50.0))
+    featup_name = (cfg.get("featup_weights") or "xclip_jbu_one_million_aid.ckpt").split("/")[-1]
+    local_featup = repo_dir / "weights" / "featup" / featup_name
+    if local_featup.exists():
+        kwargs.setdefault("featup_weights_path", local_featup)
+    kwargs.setdefault("class_names_path", repo_dir / "configs" / "cls_openearthmap_sar.txt")
+    return SegEarthPipelineCLIP(**kwargs)
+SegEarthPipeline = load

OV-2/prompts/imagenet_template.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+Text prompts for open-vocabulary segmentation.
+From SegEarth-OV-2/prompts/imagenet_template.py
+"""
+openai_imagenet_template = [
+    lambda c: f"a bad photo of a {c}.",
+    lambda c: f"a photo of many {c}.",
+    lambda c: f"a sculpture of a {c}.",
+    lambda c: f"a photo of the hard to see {c}.",
+    lambda c: f"a low resolution photo of the {c}.",
+    lambda c: f"a rendering of a {c}.",
+    lambda c: f"graffiti of a {c}.",
+    lambda c: f"a bad photo of the {c}.",
+    lambda c: f"a cropped photo of the {c}.",
+    lambda c: f"a tattoo of a {c}.",
+    lambda c: f"the embroidered {c}.",
+    lambda c: f"a photo of a hard to see {c}.",
+    lambda c: f"a bright photo of a {c}.",
+    lambda c: f"a photo of a clean {c}.",
+    lambda c: f"a photo of a dirty {c}.",
+    lambda c: f"a dark photo of the {c}.",
+    lambda c: f"a drawing of a {c}.",
+    lambda c: f"a photo of my {c}.",
+    lambda c: f"the plastic {c}.",
+    lambda c: f"a photo of the cool {c}.",
+    lambda c: f"a close-up photo of a {c}.",
+    lambda c: f"a black and white photo of the {c}.",
+    lambda c: f"a painting of the {c}.",
+    lambda c: f"a painting of a {c}.",
+    lambda c: f"a pixelated photo of the {c}.",
+    lambda c: f"a sculpture of the {c}.",
+    lambda c: f"a bright photo of the {c}.",
+    lambda c: f"a cropped photo of a {c}.",
+    lambda c: f"a plastic {c}.",
+    lambda c: f"a photo of the dirty {c}.",
+    lambda c: f"a jpeg corrupted photo of a {c}.",
+    lambda c: f"a blurry photo of the {c}.",
+    lambda c: f"a photo of the {c}.",
+    lambda c: f"a good photo of the {c}.",
+    lambda c: f"a rendering of the {c}.",
+    lambda c: f"a {c} in a video game.",
+    lambda c: f"a photo of one {c}.",
+    lambda c: f"a doodle of a {c}.",
+    lambda c: f"a close-up photo of the {c}.",
+    lambda c: f"a photo of a {c}.",
+    lambda c: f"the origami {c}.",
+    lambda c: f"the {c} in a video game.",
+    lambda c: f"a sketch of a {c}.",
+    lambda c: f"a doodle of the {c}.",
+    lambda c: f"a origami {c}.",
+    lambda c: f"a low resolution photo of a {c}.",
+    lambda c: f"the toy {c}.",
+    lambda c: f"a rendition of the {c}.",
+    lambda c: f"a photo of the clean {c}.",
+    lambda c: f"a photo of a large {c}.",
+    lambda c: f"a rendition of a {c}.",
+    lambda c: f"a photo of a nice {c}.",
+    lambda c: f"a photo of a weird {c}.",
+    lambda c: f"a blurry photo of a {c}.",
+    lambda c: f"a cartoon {c}.",
+    lambda c: f"art of a {c}.",
+    lambda c: f"a sketch of the {c}.",
+    lambda c: f"a embroidered {c}.",
+    lambda c: f"a pixelated photo of a {c}.",
+    lambda c: f"itap of the {c}.",
+    lambda c: f"a jpeg corrupted photo of the {c}.",
+    lambda c: f"a good photo of a {c}.",
+    lambda c: f"a plushie {c}.",
+    lambda c: f"a photo of the nice {c}.",
+    lambda c: f"a photo of the small {c}.",
+    lambda c: f"a photo of the weird {c}.",
+    lambda c: f"the cartoon {c}.",
+    lambda c: f"art of the {c}.",
+    lambda c: f"a drawing of the {c}.",
+    lambda c: f"a photo of the large {c}.",
+    lambda c: f"a black and white photo of a {c}.",
+    lambda c: f"the plushie {c}.",
+    lambda c: f"a dark photo of a {c}.",
+    lambda c: f"itap of a {c}.",
+    lambda c: f"graffiti of the {c}.",
+    lambda c: f"a toy {c}.",
+    lambda c: f"itap of my {c}.",
+    lambda c: f"a photo of a cool {c}.",
+    lambda c: f"a photo of a small {c}.",
+    lambda c: f"a tattoo of the {c}.",
+]
+sub_imagenet_template = [
+    lambda c: f"itap of a {c}.",
+    lambda c: f"a bad photo of a {c}.",
+    lambda c: f"a origami {c}.",
+    lambda c: f"a photo of the large {c}.",
+    lambda c: f"a {c} in a video game.",
+    lambda c: f"art of the {c}.",
+    lambda c: f"a photo of the small {c}.",
+]

OV-2/upsamplers.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""
+SimFeatUp upsamplers for dense feature restoration.
+From SegEarth-OV/OV-2 simfeatup_dev. Used by CLIP-based variants (OV, OV-2).
+"""
+import math
+from typing import Optional
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+try:
+    from featup.adaptive_conv_cuda.adaptive_conv import AdaptiveConv
+except Exception:
+    AdaptiveConv = None
+def adaptive_conv_py_simple(input, filters):
+    """Pure PyTorch fallback when featup CUDA is unavailable."""
+    b, c, h1, w1 = input.shape
+    b, h2, w2, f1, f2 = filters.shape
+    assert f1 == f2
+    t_filters = filters.reshape(b, h2, w2, f1 * f2)
+    patches = torch.nn.Unfold(f1)(input).view((b, c, f1 * f2, h2, w2))
+    return torch.einsum("bhwf,bcfhw->bchw", t_filters, patches)
+def _meshgrid(device, diameter):
+    dist_range = torch.linspace(-1, 1, diameter, device=device)
+    x, y = torch.meshgrid(dist_range, dist_range, indexing="ij")
+    return torch.cat([x.unsqueeze(0), y.unsqueeze(0)], dim=0)
+class Bilinear(torch.nn.Module):
+    def forward(self, source, guidance):
+        _, _, h, w = guidance.shape
+        return F.interpolate(source, (h, w), mode="bilinear")
+class LayeredResizeConv(torch.nn.Module):
+    def __init__(self, dim, kernel_size=1, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.conv1 = nn.Conv2d(dim + 3, dim, kernel_size, padding="same")
+        self.conv2 = nn.Conv2d(dim + 3, dim, kernel_size, padding="same")
+        self.conv3 = nn.Conv2d(dim + 3, dim, kernel_size, padding="same")
+        self.conv4 = nn.Conv2d(dim + 3, dim, kernel_size, padding="same")
+    def apply_conv(self, source, guidance, conv, activation):
+        big_source = F.interpolate(source, scale_factor=2, mode="bilinear")
+        _, _, h, w = big_source.shape
+        small_guidance = F.interpolate(guidance, (h, w), mode="bilinear")
+        output = activation(conv(torch.cat([big_source, small_guidance], dim=1)))
+        return big_source + output
+    def forward(self, source, guidance):
+        source_2 = self.apply_conv(source, guidance, self.conv1, F.relu)
+        source_4 = self.apply_conv(source_2, guidance, self.conv2, F.relu)
+        source_8 = self.apply_conv(source_4, guidance, self.conv3, F.relu)
+        source_16 = self.apply_conv(source_8, guidance, self.conv4, lambda x: x)
+        return source_16
+class SimpleImplicitFeaturizer(torch.nn.Module):
+    def __init__(self, n_freqs=20):
+        super().__init__()
+        self.n_freqs = n_freqs
+        self.dim_multiplier = 2
+    def forward(self, x):
+        b, c, h, w = x.shape
+        dtype = x.dtype
+        grid_h = torch.linspace(-1, 1, h, device=x.device, dtype=dtype)
+        grid_w = torch.linspace(-1, 1, w, device=x.device, dtype=dtype)
+        feats = torch.stack(torch.meshgrid(grid_h, grid_w, indexing="ij")).unsqueeze(0)
+        feats = feats.broadcast_to((b, feats.shape[1], h, w))
+        freqs = torch.exp(torch.linspace(-2, 10, self.n_freqs, device=x.device)).to(dtype).reshape(
+            1, self.n_freqs, 1, 1, 1
+        )
+        feats = (feats.unsqueeze(1) * freqs).reshape(b, self.n_freqs * self.dim_multiplier, h, w)
+        return torch.cat([torch.sin(feats), torch.cos(feats), x], dim=1)
+class IFA(torch.nn.Module):
+    def __init__(self, feat_dim, num_scales=20):
+        super().__init__()
+        self.feat_dim = feat_dim
+        self.sin_feats = SimpleImplicitFeaturizer()
+        self.mlp = nn.Sequential(
+            nn.Conv2d(feat_dim + (num_scales * 4) + 2, feat_dim, 1),
+            nn.BatchNorm2d(feat_dim),
+            nn.LeakyReLU(),
+            nn.Conv2d(feat_dim, feat_dim, 1),
+        )
+    def _upsample_2x(self, source):
+        b, c, h, w = source.shape
+        dtype = source.dtype
+        up_source = F.interpolate(source, (h * 2, w * 2), mode="nearest")
+        lr_cord = torch.linspace(0, h, steps=h, device=source.device, dtype=dtype)
+        hr_cord = torch.linspace(0, h, steps=2 * h, device=source.device, dtype=dtype)
+        lr_coords = torch.stack(torch.meshgrid(lr_cord, lr_cord, indexing="ij")).unsqueeze(0)
+        hr_coords = torch.stack(torch.meshgrid(hr_cord, hr_cord, indexing="ij")).unsqueeze(0)
+        up_lr_coords = F.interpolate(lr_coords, (h * 2, w * 2), mode="nearest")
+        coord_diff = up_lr_coords - hr_coords
+        coord_diff_feats = self.sin_feats(coord_diff).to(dtype)
+        bcast_coord_feats = coord_diff_feats.broadcast_to((b, coord_diff_feats.shape[1], h * 2, w * 2))
+        return self.mlp(torch.cat([up_source, bcast_coord_feats], dim=1))
+    def forward(self, source, guidance):
+        _, _, gh, gw = guidance.shape
+        x = source
+        while x.shape[2] < gh or x.shape[3] < gw:
+            x = self._upsample_2x(x)
+        if x.shape[2] != gh or x.shape[3] != gw:
+            x = F.interpolate(x, (gh, gw), mode="bilinear")
+        return x
+class JBULearnedRange(torch.nn.Module):
+    def __init__(self, guidance_dim, feat_dim, key_dim, scale=2, radius=3):
+        super().__init__()
+        self.scale = scale
+        self.radius = radius
+        self.diameter = self.radius * 2 + 1
+        self.guidance_dim = guidance_dim
+        self.key_dim = key_dim
+        self.feat_dim = feat_dim
+        self.range_temp = nn.Parameter(torch.tensor(0.0))
+        self.range_proj = nn.Sequential(
+            nn.Conv2d(guidance_dim, key_dim, 1, 1),
+            nn.GELU(),
+            nn.Dropout2d(0.1),
+            nn.Conv2d(key_dim, key_dim, 1, 1),
+        )
+        self.fixup_proj = nn.Sequential(
+            nn.Conv2d(guidance_dim + self.diameter ** 2, self.diameter ** 2, 1, 1),
+            nn.GELU(),
+            nn.Dropout2d(0.1),
+            nn.Conv2d(self.diameter ** 2, self.diameter ** 2, 1, 1),
+        )
+        self.sigma_spatial = nn.Parameter(torch.tensor(1.0))
+    def get_range_kernel(self, x):
+        GB, GC, GH, GW = x.shape
+        proj_x = self.range_proj(x)
+        proj_x_padded = F.pad(proj_x, pad=[self.radius] * 4, mode="reflect")
+        queries = (
+            torch.nn.Unfold(self.diameter)(proj_x_padded)
+            .reshape((GB, self.key_dim, self.diameter * self.diameter, GH, GW))
+            .permute(0, 1, 3, 4, 2)
+        )
+        pos_temp = self.range_temp.exp().clamp_min(1e-4).clamp_max(1e4)
+        return F.softmax(pos_temp * torch.einsum("bchwp,bchw->bphw", queries, proj_x), dim=1)
+    def get_spatial_kernel(self, device):
+        patch = _meshgrid(device, self.diameter)
+        return torch.exp(-patch.square().sum(0) / (2 * self.sigma_spatial ** 2)).reshape(
+            1, self.diameter * self.diameter, 1, 1
+        )
+    def forward(self, source, guidance):
+        GB, GC, GH, GW = guidance.shape
+        SB, SC, SH, SQ = source.shape
+        assert SB == GB
+        dtype = source.dtype
+        guidance = guidance.to(dtype)
+        spatial_kernel = self.get_spatial_kernel(source.device).to(dtype)
+        range_kernel = self.get_range_kernel(guidance).to(dtype)
+        combined_kernel = (range_kernel * spatial_kernel).to(dtype)
+        combined_kernel /= combined_kernel.sum(1, keepdim=True).clamp(1e-7)
+        combined_kernel += 0.1 * self.fixup_proj(torch.cat([combined_kernel, guidance], dim=1))
+        combined_kernel = combined_kernel.permute(0, 2, 3, 1).reshape(
+            GB, GH, GW, self.diameter, self.diameter
+        )
+        hr_source = F.interpolate(source, size=(GH, GW), mode="bicubic", align_corners=False)
+        hr_source_padded = F.pad(hr_source, pad=[self.radius] * 4, mode="reflect")
+        combined_kernel = combined_kernel.to(hr_source_padded.dtype)
+        if AdaptiveConv is not None:
+            result = AdaptiveConv.apply(hr_source_padded, combined_kernel)
+        else:
+            result = adaptive_conv_py_simple(hr_source_padded, combined_kernel)
+        return result
+class JBUStack(torch.nn.Module):
+    def __init__(self, feat_dim, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.up1 = JBULearnedRange(3, feat_dim, 32, radius=3)
+        self.up2 = JBULearnedRange(3, feat_dim, 32, radius=3)
+        self.up3 = JBULearnedRange(3, feat_dim, 32, radius=3)
+        self.up4 = JBULearnedRange(3, feat_dim, 32, radius=3)
+        self.fixup_proj = nn.Sequential(
+            nn.Dropout2d(0.2),
+            nn.Conv2d(feat_dim, feat_dim, kernel_size=1),
+        )
+    def upsample(self, source, guidance, up):
+        _, _, h, w = source.shape
+        small_guidance = F.adaptive_avg_pool2d(guidance, (h * 2, w * 2))
+        return up(source, small_guidance)
+    def forward(self, source, guidance):
+        source_2 = self.upsample(source, guidance, self.up1)
+        source_4 = self.upsample(source_2, guidance, self.up2)
+        source_8 = self.upsample(source_4, guidance, self.up3)
+        source_16 = self.upsample(source_8, guidance, self.up4)
+        return self.fixup_proj(source_16) * 0.1 + source_16
+class JBUOne(torch.nn.Module):
+    def __init__(self, feat_dim, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.up = JBULearnedRange(3, feat_dim, 32, radius=5)
+        self.fixup_proj = nn.Sequential(
+            nn.Dropout2d(0.2),
+            nn.Conv2d(feat_dim, feat_dim, kernel_size=1),
+        )
+    def upsample(self, source, guidance, up):
+        _, _, h, w = source.shape
+        small_guidance = F.adaptive_avg_pool2d(guidance, (h * 2, w * 2))
+        return up(source, small_guidance)
+    def forward(self, source, guidance):
+        source_2 = self.upsample(source, guidance, self.up)
+        source_4 = self.upsample(source_2, guidance, self.up)
+        source_8 = self.upsample(source_4, guidance, self.up)
+        source_16 = self.upsample(source_8, guidance, self.up)
+        return self.fixup_proj(source_16) * 0.1 + source_16
+FEATUP_CHECKPOINTS = {
+    "jbu_one": "simfeatup/xclip_jbu_one_million_aid.ckpt",
+    "jbu_stack": "simfeatup/clip_jbu_stack_cocostuff.ckpt",
+    "jbu_stack_maskclip": "simfeatup/maskclip_jbu_stack_cocostuff.ckpt",
+}
+def get_upsampler(name: str, feat_dim: int):
+    if name == "bilinear":
+        return Bilinear()
+    elif name == "jbu_one":
+        return JBUOne(feat_dim)
+    elif name == "jbu_stack":
+        return JBUStack(feat_dim)
+    elif name == "resize_conv":
+        return LayeredResizeConv(feat_dim, 1)
+    elif name == "ifa":
+        return IFA(feat_dim)
+    else:
+        raise ValueError(f"Unknown upsampler: {name}. Use: bilinear, jbu_one, jbu_stack, resize_conv, ifa")

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/README.md ADDED Viewed

	@@ -0,0 +1,170 @@

+---
+license: mit
+pipeline_tag: image-segmentation
+tags:
+  - clip
+  - vision
+  - remote-sensing
+  - sar
+  - earth-observation
+  - open-vocabulary
+library_name: transformers
+---
+# AlignEarth-SAR-ViT-B-16
+CLIP-style vision-language model adapted for **Synthetic Aperture Radar (SAR)** imagery via knowledge distillation from optical VLMs. Enables open-vocabulary semantic segmentation for SAR remote sensing without building SAR foundation models from scratch.
+This repository provides the model in **Hugging Face Transformers** format, converted from the original [OpenCLIP-style checkpoint](https://huggingface.co/likyoo/AlignEarth-SAR-ViT-B-16) released by the SegEarth-OV-2 authors.
+## Model Details
+- **Architecture**: CLIP (ViT-B/16 vision encoder + text encoder)
+- **Vision**: 12-layer ViT, 768 hidden, 16×16 patches, 224×224 input
+- **Text**: 12-layer transformer, 512 hidden, vocab 49408, max length 77
+- **Projection**: 512-dim shared embedding space
+- **Source**: [likyoo/AlignEarth-SAR-ViT-B-16](https://huggingface.co/likyoo/AlignEarth-SAR-ViT-B-16) (OpenCLIP format)
+- **Conversion**: Mapped to `transformers.CLIPModel` for standard HF usage
+- **SimFeatUp**: Full upsampler suite from [SegEarth-OV/simfeatup_dev](https://github.com/likyoo/SegEarth-OV/tree/main/simfeatup_dev):
+  - `jbu_one` → `simfeatup/xclip_jbu_one_million_aid.ckpt` (default, remote-sensing)
+  - `jbu_stack` → `simfeatup/clip_jbu_stack_cocostuff.ckpt`
+  - `jbu_stack_maskclip` → `simfeatup/maskclip_jbu_stack_cocostuff.ckpt`
+  - `bilinear`, `resize_conv`, `ifa` (no pretrained weights)
+## Usage
+```python
+from transformers import CLIPModel, CLIPProcessor
+from PIL import Image
+model = CLIPModel.from_pretrained("BiliSakura/AlignEarth-SAR-ViT-B-16")
+processor = CLIPProcessor.from_pretrained("BiliSakura/AlignEarth-SAR-ViT-B-16")
+image = Image.open("sar_image.tif")
+texts = ["building", "road", "water body", "vegetation"]
+inputs = processor(
+    text=texts,
+    images=image,
+    return_tensors="pt",
+    padding=True,
+)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image
+probs = logits_per_image.softmax(dim=1)
+```
+For dense features (e.g., segmentation with SegEarth-OV-2), use the vision encoder:
+```python
+from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
+vision_model = CLIPVisionModelWithProjection.from_pretrained("BiliSakura/AlignEarth-SAR-ViT-B-16")
+processor = CLIPImageProcessor.from_pretrained("BiliSakura/AlignEarth-SAR-ViT-B-16")
+inputs = processor(images=image, return_tensors="pt")
+outputs = vision_model(**inputs)
+image_embeds = outputs.image_embeds  # pooled
+# Or use vision_model.vision_model for patch-level features
+```
+## Full Pipeline (SegEarth-OV-2 Style)
+For **open-vocabulary SAR segmentation** with SimFeatUp dense upsampling:
+```python
+from pathlib import Path
+from PIL import Image
+from pipeline import SegEarthPipeline
+pipe = SegEarthPipeline(Path("BiliSakura/AlignEarth-SAR-ViT-B-16"))
+image = Image.open("your_sar_image.tif").convert("RGB")
+seg_map = pipe(image)  # [H, W] class indices
+```
+The pipeline combines:
+- **AlignEarth** CLIP encoder (SAR-adapted)
+- **SimFeatUp** upsampler (choose `jbu_one`, `jbu_stack`, `jbu_stack_maskclip`, or `bilinear`)
+- **Global Bias Alleviation** (cls_token_lambda) – subtracts global context from patch features
+- **Logit scaling** and **prob threshold** for robust predictions
+- **Sliding window** for large images
+- OpenEarthMap SAR class names (customize via `cls_openearthmap_sar.txt` or `configs/cls_*.txt`)
+```python
+# Use different featup models
+pipe = SegEarthPipeline(Path("."), featup_model="jbu_one")       # default
+pipe = SegEarthPipeline(Path("."), featup_model="jbu_stack")
+pipe = SegEarthPipeline(Path("."), featup_model="jbu_stack_maskclip")
+pipe = SegEarthPipeline(Path("."), featup_model="bilinear")       # no weights
+# Full SegEarth-OV-2 options
+pipe = SegEarthPipeline(Path("."), cls_token_lambda=-0.3, logit_scale=50, prob_thd=0)
+pipe = SegEarthPipeline(Path("."), slide_crop=224, slide_stride=112)  # sliding window for large images
+pipe = SegEarthPipeline(Path("."), class_names_path="configs/cls_whu_sar.txt")  # different dataset
+```
+## Demo / Test
+A paired demo sample from **YESeg-OPT-SAR** is in `demo_YESeg-OPT-SAR/`: `sar.png`, `rgb.png`, `label.png`. **Note**: This model targets SAR imagery, not optical.
+```bash
+python test_demo.py                         # uses demo_YESeg-OPT-SAR, cls_yeseg_sar, prob_thd=0.3
+python test_demo.py --featup jbu_stack     # try jbu_stack upsampler
+python test_demo.py --save out.png          # save figure
+```
+The script displays a matplotlib image grid: RGB | SAR | Label (GT) | Prediction.
+## Evaluation
+Standalone evaluation (no mmseg) on image/label pairs:
+```bash
+python eval.py --img-dir data/OpenEarthMap_SAR/test/sar_images \\
+               --label-dir data/OpenEarthMap_SAR/test/labels \\
+               --config configs/cls_openearthmap_sar.txt
+```
+SAR class configs in `configs/`: `cls_openearthmap_sar.txt`, `cls_whu_sar.txt`, `cls_hrsid.txt`, `cls_pie_sar.txt`, `cls_fusar.txt`, `cls_yeseg_sar.txt`, `cls_ddhrnet_xian_sar.txt`.
+Or from Python:
+```python
+from pathlib import Path
+from pipeline import SegEarthPipeline
+from PIL import Image
+pipe = SegEarthPipeline(Path("."))
+image = Image.open("demo/sar.png").convert("RGB")
+seg = pipe(image)
+```
+## Citation
+If you use this model, please cite the SegEarth-OV-2 paper:
+```bibtex
+@article{li2025segearthov2,
+  title={Annotation-Free Open-Vocabulary Segmentation for Remote-Sensing Images},
+  author={Li, Kaiyu and Cao, Xiangyong and Liu, Ruixun and Wang, Shihong and Jiang, Zixuan and Wang, Zhi and Meng, Deyu},
+  journal={arXiv preprint arXiv:2508.18067},
+  year={2025}
+}
+```
+## License
+MIT License (inherited from the original AlignEarth release).
+## Dependencies
+- `transformers`, `torch`, `torchvision`, `PIL`
+- Optional: `featup` for CUDA-accelerated JBU (falls back to pure PyTorch)
+- Optional: `mmcv` for CarafeUpsampler, `sapa` for SAPAUpsampler
+## Related
+- **Original weights**: [likyoo/AlignEarth-SAR-ViT-B-16](https://huggingface.co/likyoo/AlignEarth-SAR-ViT-B-16)
+- **Code**: [SegEarth-OV-2](https://github.com/earth-insights/SegEarth-OV-2)
+- **Paper**: [arXiv:2508.18067](https://arxiv.org/abs/2508.18067)

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/cls_openearthmap_sar.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+background
+bareland,barren
+grass
+pavement
+road
+tree,forest
+water,river
+cropland
+building,roof,house

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "architectures": [
+    "CLIPModel"
+  ],
+  "dtype": "float32",
+  "initializer_factor": 1.0,
+  "logit_scale_init_value": 2.6592,
+  "model_type": "clip",
+  "projection_dim": 512,
+  "text_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 512,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 2048,
+    "layer_norm_eps": 1e-05,
+    "max_position_embeddings": 77,
+    "model_type": "clip_text_model",
+    "num_attention_heads": 8,
+    "num_hidden_layers": 12,
+    "projection_dim": 512,
+    "vocab_size": 49408
+  },
+  "transformers_version": "4.57.3",
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 768,
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-05,
+    "model_type": "clip_vision_model",
+    "num_attention_heads": 12,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "patch_size": 16,
+    "projection_dim": 512
+  }
+}

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_ddhrnet_xian_sar.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+building
+road
+farmland,grass
+greenery
+water

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_fusar.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+others
+water
+road
+building
+grass

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_hrsid.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ background
2	+ ship,boat

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_openearthmap_sar.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+background
+bareland,barren
+grass
+pavement
+road
+tree,forest
+water,river
+cropland
+building,roof,house

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_pie_sar.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+background
+city
+road
+water,river
+forest
+cropland

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_whu_sar.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+farmland
+city
+village
+water
+forest
+road
+others

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/configs/cls_yeseg_sar.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+bareground,barren
+grass,farmland
+dense tree cover
+city
+water
+roadway

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/conversion_meta.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "source_checkpoint": "/data/projects/models/hf_models/likyoo/AlignEarth-SAR-ViT-B-16/AlignEarth-SAR-ViT-B-16.pt",
+  "source_top_level_keys": [
+    "epoch",
+    "name",
+    "state_dict",
+    "optimizer",
+    "scaler"
+  ],
+  "converted_to": "transformers.CLIPModel",
+  "notes": "Mapped OpenCLIP-style state_dict to HF CLIPModel key format (vision+text)."
+}

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# Demo sample from YESeg-OPT-SAR dataset
+This folder contains a paired sample from the **YESeg-OPT-SAR** dataset:
+- `sar.png` – SAR image (input)
+- `rgb.png` – Optical RGB reference
+- `label.png` / `cvt_label.png` – Ground truth segmentation
+Use `configs/cls_yeseg_sar.txt` and `--prob-thd 0.3` when running on this demo.

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/cvt_label.png ADDED Viewed

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/label.png ADDED Viewed

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/rgb.png ADDED Viewed

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_YESeg-OPT-SAR/sar.png ADDED Viewed

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/demo_result.png ADDED Viewed

Git LFS Details

SHA256: 67150d60f8d6993e88a7c0c0b873a71a43ce13430c8f986f05e3dad94d1acff9
Pointer size: 131 Bytes
Size of remote file: 654 kB

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/eval.py ADDED Viewed

	@@ -0,0 +1,163 @@

+"""
+Standalone evaluation script for AlignEarth segmentation.
+Evaluates on datasets with image/label pairs. Computes mIoU.
+Usage:
+    python eval.py --img-dir data/OpenEarthMap_SAR/test/sar_images \\
+                   --label-dir data/OpenEarthMap_SAR/test/labels \\
+                   --config configs/cls_openearthmap_sar.txt
+"""
+import argparse
+from pathlib import Path
+import numpy as np
+from PIL import Image
+try:
+    from tqdm import tqdm
+except ImportError:
+    def tqdm(x, **kw):
+        return x
+from pipeline import SegEarthPipeline
+def load_label(path: Path, reduce_zero_label: bool = False) -> np.ndarray:
+    """Load label as index map. Handles PNG (index or RGB) and single-channel."""
+    arr = np.array(Image.open(path))
+    if arr.ndim == 3:
+        # RGB label: convert to index if needed (dataset-specific)
+        arr = arr[:, :, 0] if arr.shape[2] >= 1 else arr
+    if reduce_zero_label and arr.max() > 0:
+        arr = arr - 1
+        arr[arr < 0] = 255  # ignore
+    return arr.astype(np.int64)
+def compute_iou(pred: np.ndarray, gt: np.ndarray, num_classes: int, ignore_index: int = 255) -> np.ndarray:
+    """Per-class IoU. Returns array of length num_classes."""
+    ious = np.zeros(num_classes)
+    for c in range(num_classes):
+        pred_c = pred == c
+        gt_c = gt == c
+        if gt_c.sum() == 0:
+            ious[c] = np.nan
+            continue
+        intersection = (pred_c & gt_c).sum()
+        union = (pred_c | gt_c).sum()
+        if union == 0:
+            ious[c] = np.nan
+        else:
+            ious[c] = intersection / union
+    return ious
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--img-dir", required=True, help="Directory of images")
+    parser.add_argument("--label-dir", required=True, help="Directory of labels")
+    parser.add_argument("--config", default="configs/cls_openearthmap_sar.txt", help="Class config file")
+    parser.add_argument("--img-suffix", default=".png", help="Image suffix")
+    parser.add_argument("--label-suffix", default=".png", help="Label suffix")
+    parser.add_argument("--reduce-zero-label", action="store_true", help="Label 1..N -> 0..N-1")
+    parser.add_argument("--save-pred-dir", help="Save predictions to directory")
+    parser.add_argument("--featup", default="jbu_one", help="SimFeatUp model")
+    parser.add_argument("--slide-crop", type=int, default=0, help="Sliding window crop size (0=disabled)")
+    parser.add_argument("--slide-stride", type=int, default=112, help="Sliding window stride")
+    parser.add_argument("--cls-token-lambda", type=float, default=-0.3, help="Global Bias Alleviation")
+    parser.add_argument("--logit-scale", type=float, default=50.0, help="Softmax temperature")
+    parser.add_argument("--prob-thd", type=float, default=0.0, help="Low-confidence threshold")
+    parser.add_argument("--limit", type=int, default=0, help="Limit number of samples (0=all)")
+    args = parser.parse_args()
+    repo_dir = Path(__file__).resolve().parent
+    img_dir = Path(args.img_dir)
+    label_dir = Path(args.label_dir)
+    config_path = Path(args.config)
+    if not config_path.is_absolute():
+        config_path = repo_dir / config_path
+    if not img_dir.exists():
+        raise FileNotFoundError(f"Image dir not found: {img_dir}")
+    if not label_dir.exists():
+        raise FileNotFoundError(f"Label dir not found: {label_dir}")
+    if not config_path.exists():
+        raise FileNotFoundError(f"Config not found: {config_path}")
+    # Collect images
+    img_files = sorted(img_dir.glob(f"*{args.img_suffix}"))
+    if args.limit:
+        img_files = img_files[: args.limit]
+    if not img_files:
+        raise FileNotFoundError(f"No images in {img_dir}")
+    # Build pipeline
+    pipe = SegEarthPipeline(
+        repo_dir,
+        featup_model=args.featup,
+        class_names_path=config_path,
+        cls_token_lambda=args.cls_token_lambda,
+        logit_scale=args.logit_scale,
+        prob_thd=args.prob_thd,
+        slide_crop=args.slide_crop,
+        slide_stride=args.slide_stride,
+        device="cuda",
+    )
+    num_classes = pipe.num_classes
+    save_pred_dir = Path(args.save_pred_dir) if args.save_pred_dir else None
+    if save_pred_dir:
+        save_pred_dir.mkdir(parents=True, exist_ok=True)
+    all_ious = []
+    for img_path in tqdm(img_files, desc="Evaluating"):
+        label_name = img_path.stem + args.label_suffix
+        label_path = label_dir / label_name
+        if not label_path.exists():
+            label_path = label_dir / (img_path.name.replace(args.img_suffix, args.label_suffix))
+        if not label_path.exists():
+            tqdm.write(f"Skipping {img_path.name}: no label")
+            continue
+        img = Image.open(img_path).convert("RGB")
+        gt = load_label(label_path, reduce_zero_label=args.reduce_zero_label)
+        pred = pipe(img)
+        pred_np = pred.cpu().numpy()
+        # Resize pred to match gt if needed
+        if pred_np.shape != gt.shape:
+            from PIL import Image as PILImage
+            pred_pil = PILImage.fromarray(pred_np.astype(np.uint8))
+            pred_pil = pred_pil.resize((gt.shape[1], gt.shape[0]), PILImage.NEAREST)
+            pred_np = np.array(pred_pil)
+        # Mask ignore
+        valid = gt != 255
+        if valid.sum() == 0:
+            continue
+        pred_m = pred_np.copy()
+        pred_m[~valid] = 255
+        gt_m = gt.copy()
+        gt_m[~valid] = 255
+        ious = compute_iou(pred_m, gt_m, num_classes, ignore_index=255)
+        all_ious.append(ious)
+        if save_pred_dir:
+            out_path = save_pred_dir / (img_path.stem + "_pred.png")
+            Image.fromarray(pred_np.astype(np.uint8)).save(out_path)
+    if not all_ious:
+        print("No valid samples.")
+        return
+    all_ious = np.array(all_ious)
+    mean_iou = np.nanmean(all_ious, axis=0)
+    miou = np.nanmean(mean_iou)
+    print(f"mIoU: {miou:.4f}")
+    print("Per-class IoU:", mean_iou)
+if __name__ == "__main__":
+    main()

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

OV-2/weights/backbone/AlignEarth-SAR-ViT-B-16/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:604f0111c635195ec1a723d6a256f476b2c272f330f186a8edeec9f81a4cb560
+size 598530372