Upload model (#3)

Browse files

- Upload model (0ed647ae3597b5dee09f437206b4fa27219e20ea)

Files changed (7) hide show

adaptor_attn.py +48 -0
adaptor_base.py +13 -0
adaptor_generic.py +1 -1
adaptor_mlp.py +4 -121
adaptor_module_factory.py +96 -0
common.py +1 -1
hf_model.py +3 -1

adaptor_attn.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import math
+from typing import Dict, Optional
+import torch
+from torch import nn
+from einops import rearrange
+from timm.models.vision_transformer import Block
+from .enable_spectral_reparam import disable_spectral_reparam, enable_spectral_reparam
+from .adaptor_base import AdaptorModuleBase
+from .adaptor_mlp import MLP2
+class AttnFDHead(AdaptorModuleBase):
+    def __init__(
+        self,
+        input_size: int,
+        hidden_size: int,
+        output_size: int,
+        num_inner: int = 0,
+        pre_norm: bool = False,
+        device: torch.device = None,
+        upsample_factor: int = 1,
+        upsample_rank: int = 0,
+        **kwargs  # Ignore kwargs that might be to other "mlp" verions, e.g. teacher_summary_idxs
+    ) -> None:
+        super().__init__(requires_summary_and_spatial=False)
+        from timm.models.vision_transformer import Block
+        self.blocks = nn.Sequential(*[
+            Block(input_size, num_heads=16, init_values=1e-5)
+            for _ in range(2)
+        ])
+        self.mlp = MLP2(input_size, hidden_size, output_size,
+                        num_inner=0, pre_norm=pre_norm, device=device,
+                        upsample_factor=upsample_factor, upsample_rank=upsample_rank, **kwargs)
+    def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
+        x = self.blocks(x)
+        x = self.mlp(x)
+        return x

adaptor_base.py CHANGED Viewed

@@ -32,6 +32,19 @@ class RadioOutput(NamedTuple):
         )
 class AdaptorBase(nn.Module):
     def forward(self, input: AdaptorInput) -> RadioOutput:
         raise NotImplementedError("Subclasses must implement this!")

         )
+class AdaptorModuleBase(nn.Module):
+    def __init__(
+        self,
+        requires_summary_and_spatial: bool,
+        handles_summary_and_spatial: bool = False
+    ) -> None:
+        super().__init__()
+        self.requires_summary_and_spatial = requires_summary_and_spatial
+        self.handles_summary_and_spatial = handles_summary_and_spatial
+        assert not handles_summary_and_spatial or requires_summary_and_spatial, "If handles summary and spatial, must require it too!"
 class AdaptorBase(nn.Module):
     def forward(self, input: AdaptorInput) -> RadioOutput:
         raise NotImplementedError("Subclasses must implement this!")

adaptor_generic.py CHANGED Viewed

@@ -12,7 +12,7 @@ from torch import nn
 import torch.nn.functional as F
 from .adaptor_base import AdaptorBase, AdaptorInput, RadioOutput
-from .adaptor_mlp import create_mlp_from_state, create_mlp_from_config
 class GenericAdaptor(AdaptorBase):

 import torch.nn.functional as F
 from .adaptor_base import AdaptorBase, AdaptorInput, RadioOutput
+from .adaptor_module_factory import create_mlp_from_state, create_mlp_from_config
 class GenericAdaptor(AdaptorBase):

adaptor_mlp.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 #
 # NVIDIA CORPORATION and its licensors retain all intellectual property
 # and proprietary rights in and to this software, related documentation
@@ -15,21 +15,10 @@ from einops import rearrange
 from timm.models.vision_transformer import Block
 from .enable_spectral_reparam import disable_spectral_reparam, enable_spectral_reparam
-class MLPBase(nn.Module):
-    def __init__(
-        self,
-        requires_summary_and_spatial: bool,
-        handles_summary_and_spatial: bool = False
-    ) -> None:
-        super().__init__()
-        self.requires_summary_and_spatial = requires_summary_and_spatial
-        self.handles_summary_and_spatial = handles_summary_and_spatial
-        assert not handles_summary_and_spatial or requires_summary_and_spatial, "If handles summary and spatial, must require it too!"
-class MLP(MLPBase):
     def __init__(self, input_size: int, hidden_size: int, output_size: int,
                  num_inner: int = 0, device: torch.device = None, **kwargs):
         super(MLP, self).__init__(requires_summary_and_spatial=False)
@@ -60,7 +49,7 @@ class MLP(MLPBase):
         return x
-class MLP2(MLPBase):
     def __init__(self, input_size: int, hidden_size: int, output_size: int,
                  num_inner: int = 0,
                  pre_norm: bool = False, device: torch.device = None,
@@ -118,109 +107,3 @@ class MLP2(MLPBase):
                           c=self._real_output_dim)
         return x
-class AttnFDHead(MLPBase):
-    def __init__(
-        self,
-        input_size: int,
-        hidden_size: int,
-        output_size: int,
-        num_inner: int = 0,
-        pre_norm: bool = False,
-        device: torch.device = None,
-        upsample_factor: int = 1,
-        upsample_rank: int = 0,
-        **kwargs  # Ignore kwargs that might be to other "mlp" verions, e.g. teacher_summary_idxs
-    ) -> None:
-        super().__init__(requires_summary_and_spatial=False)
-        from timm.models.vision_transformer import Block
-        self.blocks = nn.Sequential(*[
-            Block(input_size, num_heads=16, init_values=1e-5)
-            for _ in range(2)
-        ])
-        self.mlp = MLP2(input_size, hidden_size, output_size,
-                        num_inner=0, pre_norm=pre_norm, device=device,
-                        upsample_factor=upsample_factor, upsample_rank=upsample_rank, **kwargs)
-    def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
-        x = self.blocks(x)
-        x = self.mlp(x)
-        return x
-MLP_SUMMARY_FACTORY = {
-    'v1': MLP,
-    'v2': MLP2,
-}
-MLP_FD_FACTORY = {
-    'v1': MLP,
-    'v2': MLP2,
-    'attn': AttnFDHead,
-}
-def strip_prefix(state: Dict[str, torch.Tensor], prefix: str):
-    state = {
-        k[len(prefix):]: v
-        for k, v in state.items()
-        if k.startswith(prefix)
-    }
-    return state
-def get_mlp_info_from_state(version: str, state: Dict[str, torch.Tensor], prefix: str = '', spectral_weights: bool = False):
-    state = strip_prefix(state, prefix)
-    weight_suffix = 'weight' if not spectral_weights else 'parametrizations.weight.original'
-    if version == 'v1':
-        hidden_dim, input_dim = state[f'fc1.{weight_suffix}'].shape
-        output_dim = state[f'fc2.{weight_suffix}'].shape[0]
-        for num_inner in range(1000):
-            k = f'inner.{num_inner}.0.weight'
-            if k not in state:
-                break
-    elif version == 'v2':
-        hidden_dim, input_dim = state[f'fc1.{weight_suffix}'].shape
-        output_dim = state[f'final.2.{weight_suffix}'].shape[0]
-        for num_inner in range(1000):
-            k = f'blocks.{num_inner}.0.weight'
-            if k not in state:
-                break
-    elif version == 'attn':
-        hidden_dim, input_dim = state[f'mlp.fc1.{weight_suffix}'].shape
-        output_dim = state[f'mlp.final.2.{weight_suffix}'].shape[0]
-        num_inner = 0
-    else:
-        raise ValueError(f'Unsupported MLP version: {version}')
-    return input_dim, hidden_dim, output_dim, num_inner
-def create_mlp_from_config(version: str, input_dim: int, hidden_dim: int, output_dim: int, num_inner: int, is_summary: bool = True, **kwargs):
-    factory = MLP_SUMMARY_FACTORY if is_summary else MLP_FD_FACTORY
-    ret: nn.Module = factory[version](input_dim, hidden_dim, output_dim, num_inner, from_config=True, **kwargs)
-    return ret
-def create_mlp_from_state(version: str, state: Dict[str, torch.Tensor], prefix: str = '', spectral_weights: bool = False, is_summary: bool = True, **kwargs):
-    state = strip_prefix(state, prefix)
-    input_dim, hidden_dim, output_dim, num_inner = get_mlp_info_from_state(version, state, spectral_weights=spectral_weights)
-    ret: nn.Module = create_mlp_from_config(version, input_dim, hidden_dim, output_dim, num_inner, is_summary=is_summary, **kwargs)
-    if spectral_weights:
-        enable_spectral_reparam(ret, init_norm_to_current=False, state_dict_guidance=state)
-    ret.load_state_dict(state)
-    if spectral_weights:
-        disable_spectral_reparam(ret)
-    return ret

+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 #
 # NVIDIA CORPORATION and its licensors retain all intellectual property
 # and proprietary rights in and to this software, related documentation
 from timm.models.vision_transformer import Block
 from .enable_spectral_reparam import disable_spectral_reparam, enable_spectral_reparam
+from .adaptor_base import AdaptorModuleBase
+class MLP(AdaptorModuleBase):
     def __init__(self, input_size: int, hidden_size: int, output_size: int,
                  num_inner: int = 0, device: torch.device = None, **kwargs):
         super(MLP, self).__init__(requires_summary_and_spatial=False)
         return x
+class MLP2(AdaptorModuleBase):
     def __init__(self, input_size: int, hidden_size: int, output_size: int,
                  num_inner: int = 0,
                  pre_norm: bool = False, device: torch.device = None,
                           c=self._real_output_dim)
         return x

adaptor_module_factory.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import math
+from typing import Dict, Optional
+import torch
+from torch import nn
+from einops import rearrange
+from timm.models.vision_transformer import Block
+from .enable_spectral_reparam import disable_spectral_reparam, enable_spectral_reparam
+from .adaptor_mlp import MLP, MLP2
+from .adaptor_attn import AttnFDHead
+MLP_SUMMARY_FACTORY = {
+    'v1': MLP,
+    'v2': MLP2,
+}
+MLP_FD_FACTORY = {
+    'v1': MLP,
+    'v2': MLP2,
+    'attn': AttnFDHead,
+}
+def strip_prefix(state: Dict[str, torch.Tensor], prefix: str):
+    state = {
+        k[len(prefix):]: v
+        for k, v in state.items()
+        if k.startswith(prefix)
+    }
+    return state
+def get_mlp_info_from_state(version: str, state: Dict[str, torch.Tensor], prefix: str = '', spectral_weights: bool = False):
+    state = strip_prefix(state, prefix)
+    weight_suffix = 'weight' if not spectral_weights else 'parametrizations.weight.original'
+    if version == 'v1':
+        hidden_dim, input_dim = state[f'fc1.{weight_suffix}'].shape
+        output_dim = state[f'fc2.{weight_suffix}'].shape[0]
+        for num_inner in range(1000):
+            k = f'inner.{num_inner}.0.weight'
+            if k not in state:
+                break
+    elif version == 'v2':
+        hidden_dim, input_dim = state[f'fc1.{weight_suffix}'].shape
+        output_dim = state[f'final.2.{weight_suffix}'].shape[0]
+        for num_inner in range(1000):
+            k = f'blocks.{num_inner}.0.weight'
+            if k not in state:
+                break
+    elif version == 'attn':
+        hidden_dim, input_dim = state[f'mlp.fc1.{weight_suffix}'].shape
+        output_dim = state[f'mlp.final.2.{weight_suffix}'].shape[0]
+        num_inner = 0
+    else:
+        raise ValueError(f'Unsupported MLP version: {version}')
+    return input_dim, hidden_dim, output_dim, num_inner
+def create_mlp_from_config(version: str, input_dim: int, hidden_dim: int, output_dim: int, num_inner: int, is_summary: bool = True, **kwargs):
+    factory = MLP_SUMMARY_FACTORY if is_summary else MLP_FD_FACTORY
+    ret: nn.Module = factory[version](input_dim, hidden_dim, output_dim, num_inner, from_config=True, **kwargs)
+    return ret
+def create_mlp_from_state(version: str, state: Dict[str, torch.Tensor], prefix: str = '', spectral_weights: bool = False, is_summary: bool = True, **kwargs):
+    state = strip_prefix(state, prefix)
+    input_dim, hidden_dim, output_dim, num_inner = get_mlp_info_from_state(version, state, spectral_weights=spectral_weights)
+    ret: nn.Module = create_mlp_from_config(version, input_dim, hidden_dim, output_dim, num_inner, is_summary=is_summary, **kwargs)
+    if spectral_weights:
+        enable_spectral_reparam(ret, init_norm_to_current=False, state_dict_guidance=state)
+    ret.load_state_dict(state)
+    if spectral_weights:
+        disable_spectral_reparam(ret)
+    return ret

common.py CHANGED Viewed

@@ -146,7 +146,7 @@ RESOURCE_MAP = {
     "c-radio_v4-so400m": RadioResource(
         # NOTE: C-RADIO models are bound by different license terms than that present in the LICENSE file.
         # Please refer to the readme, or to https://huggingface.co/nvidia/C-RADIOv4-SO400M for more information.
-        "https://huggingface.co/nvidia/C-RADIOv4-SO400M/resolve/main/c-radio-v4-so400m_half.pth.tar?download=true",
         patch_size=16,
         max_resolution=2048,
         preferred_resolution=Resolution(512, 512),

     "c-radio_v4-so400m": RadioResource(
         # NOTE: C-RADIO models are bound by different license terms than that present in the LICENSE file.
         # Please refer to the readme, or to https://huggingface.co/nvidia/C-RADIOv4-SO400M for more information.
+        "https://huggingface.co/nvidia/C-RADIOv4-SO400M/resolve/main/c-radio_v4-so400m_half.pth.tar?download=true",
         patch_size=16,
         max_resolution=2048,
         preferred_resolution=Resolution(512, 512),

hf_model.py CHANGED Viewed

@@ -25,7 +25,9 @@ from .common import RESOURCE_MAP, DEFAULT_VERSION
 # Import all required modules.
 from .adaptor_base import AdaptorBase, RadioOutput, AdaptorInput
 from .adaptor_generic import GenericAdaptor, AdaptorBase
-from .adaptor_mlp import create_mlp_from_config
 from .adaptor_registry import adaptor_registry
 from .cls_token import ClsToken
 from .dinov2_arch import dinov2_vitg14_reg

 # Import all required modules.
 from .adaptor_base import AdaptorBase, RadioOutput, AdaptorInput
 from .adaptor_generic import GenericAdaptor, AdaptorBase
+from .adaptor_module_factory import create_mlp_from_config
+from .adaptor_mlp import MLP, MLP2
+from .adaptor_attn import AttnFDHead
 from .adaptor_registry import adaptor_registry
 from .cls_token import ClsToken
 from .dinov2_arch import dinov2_vitg14_reg