Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

config.json +15 -0
configuration_upscaler.py +24 -0
image_processing_upscaler.py +49 -0
last.ckpt +2 -2
model.safetensors +3 -0
modeling_upscaler.py +118 -0
preprocessor_config.json +5 -0

config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "model_type": "upscaler",
+  "scale": 2,
+  "in_channels": 3,
+  "width": 32,
+  "num_blocks": 3,
+  "feat1": 64,
+  "feat2": 32,
+  "use_refine": false,
+  "auto_map": {
+    "AutoConfig": "configuration_upscaler.UpscalerConfig",
+    "AutoModel": "modeling_upscaler.UpscalerModel",
+    "AutoImageProcessor": "image_processing_upscaler.UpscalerImageProcessor"
+  }
+}

configuration_upscaler.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from transformers import PretrainedConfig
+class UpscalerConfig(PretrainedConfig):
+    model_type = "upscaler"
+    def __init__(
+        self,
+        scale: int = 2,
+        in_channels: int = 3,
+        width: int = 32,
+        num_blocks: int = 3,
+        feat1: int = 64,
+        feat2: int = 32,
+        use_refine: bool = False,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.scale = int(scale)
+        self.in_channels = int(in_channels)
+        self.width = int(width)
+        self.num_blocks = int(num_blocks)
+        self.feat1 = int(feat1)
+        self.feat2 = int(feat2)
+        self.use_refine = bool(use_refine)

image_processing_upscaler.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from typing import Any, Dict, List, Optional, Union
+import numpy as np
+import torch
+from PIL import Image
+from transformers import ImageProcessingMixin
+def _to_rgb(img: Image.Image) -> Image.Image:
+    if img.mode != "RGB":
+        return img.convert("RGB")
+    return img
+class UpscalerImageProcessor(ImageProcessingMixin):
+    """
+    Minimal processor:
+    - input: PIL or list of PIL
+    - output: pixel_values float32 in [0,1], shape (B,3,H,W)
+    No ImageNet normalization (recommended for SR trained on [0,1]).
+    """
+    model_input_names = ["pixel_values"]
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def _pil_to_tensor_01(self, img: Image.Image) -> torch.FloatTensor:
+        img = _to_rgb(img)
+        arr = np.array(img, dtype=np.float32) / 255.0  # H,W,3 in [0,1]
+        t = torch.from_numpy(arr).permute(2, 0, 1).contiguous()  # 3,H,W
+        return t
+    def __call__(
+        self,
+        images: Union[Image.Image, List[Image.Image]],
+        return_tensors: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        if isinstance(images, Image.Image):
+            images = [images]
+        tensors = [self._pil_to_tensor_01(im) for im in images]
+        pixel_values = torch.stack(tensors, dim=0)  # B,3,H,W
+        if return_tensors is None or return_tensors == "pt":
+            return {"pixel_values": pixel_values}
+        raise ValueError("Only return_tensors=None or 'pt' is supported.")

last.ckpt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c37baae50cfbf1ff0bed65729f69c0a6f8855a9a6862a39a1ff13c42b51b4e7b
-size 59907923

 version https://git-lfs.github.com/spec/v1
+oid sha256:839eb5ff4bd63a27252729d11b98a36cadd142861a7b4a5afef032b5bcd61c90
+size 1039253

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b3ac27502f1eebba4f5c3491b7ae35308663f07d73b92ce951156d5badc21a3
+size 338252

modeling_upscaler.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from dataclasses import dataclass
+from typing import Optional
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import PreTrainedModel
+from transformers.utils import ModelOutput
+from configuration_upscaler import UpscalerConfig
+# -------------------------
+# Architecture (same as yours)
+# -------------------------
+class ResidualBlock(nn.Module):
+    def __init__(self, channels: int):
+        super().__init__()
+        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
+        self.act   = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
+    def forward(self, x):
+        y = self.act(self.conv1(x))
+        y = self.conv2(y)
+        return x + y
+class RestorationNet(nn.Module):
+    def __init__(self, in_channels=3, width=32, num_blocks=3):
+        super().__init__()
+        self.in_conv  = nn.Conv2d(in_channels, width, 3, padding=1)
+        self.blocks   = nn.Sequential(*[ResidualBlock(width) for _ in range(num_blocks)])
+        self.out_conv = nn.Conv2d(width, in_channels, 3, padding=1)
+    def forward(self, lr):
+        y = self.blocks(self.in_conv(lr))
+        y = self.out_conv(y)
+        return lr + y
+class ESPCNUpsampler(nn.Module):
+    def __init__(self, in_channels=3, scale=2, feat1=64, feat2=32, use_refine=False):
+        super().__init__()
+        assert scale in (2, 3, 4)
+        self.conv1 = nn.Conv2d(in_channels, feat1, 5, padding=2)
+        self.act1  = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(feat1, feat2, 3, padding=1)
+        self.act2  = nn.ReLU(inplace=True)
+        # IMPORTANT: conv3 out_channels depends on scale (PixelShuffle constraint)
+        self.conv3 = nn.Conv2d(feat2, in_channels * (scale ** 2), 3, padding=1)
+        self.ps    = nn.PixelShuffle(scale)
+        self.refine = nn.Conv2d(in_channels, in_channels, 3, padding=1) if use_refine else None
+    def forward(self, x):
+        y = self.act1(self.conv1(x))
+        y = self.act2(self.conv2(y))
+        y = self.ps(self.conv3(y))
+        if self.refine is not None:
+            y = self.refine(y)
+        return y
+class TwoStageSR(nn.Module):
+    def __init__(self, in_channels=3, scale=2, width=32, num_blocks=3, feat1=64, feat2=32, use_refine=False):
+        super().__init__()
+        self.scale = scale
+        self.restoration = RestorationNet(in_channels=in_channels, width=width, num_blocks=num_blocks)
+        self.upsampler = ESPCNUpsampler(
+            in_channels=in_channels, scale=scale, feat1=feat1, feat2=feat2, use_refine=use_refine
+        )
+    def forward(self, lr):
+        lr_clean = self.restoration(lr)
+        hr_pred  = self.upsampler(lr_clean)
+        return hr_pred
+# -------------------------
+# Transformers output
+# -------------------------
+@dataclass
+class UpscalerOutput(ModelOutput):
+    sr: torch.FloatTensor
+class UpscalerModel(PreTrainedModel):
+    config_class = UpscalerConfig
+    main_input_name = "pixel_values"
+    def __init__(self, config: UpscalerConfig):
+        super().__init__(config)
+        self.model = TwoStageSR(
+            in_channels=config.in_channels,
+            scale=config.scale,
+            width=config.width,
+            num_blocks=config.num_blocks,
+            feat1=config.feat1,
+            feat2=config.feat2,
+            use_refine=config.use_refine,
+        )
+        # init weights (optional; usually weights will be loaded)
+        self.post_init()
+    def forward(self, pixel_values: torch.FloatTensor, **kwargs) -> UpscalerOutput:
+        """
+        pixel_values: float tensor in [0,1], shape (B,3,H,W)
+        returns: UpscalerOutput(sr=...)
+        """
+        sr = self.model(pixel_values)
+        return UpscalerOutput(sr=sr)

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "do_normalize": false,
+  "do_rescale": true,
+  "rescale_factor": 0.00392156862745098
+}