TutlaytAI commited on
Commit
4ba2aed
·
verified ·
1 Parent(s): e33cbdd

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "upscaler",
3
+ "scale": 2,
4
+ "in_channels": 3,
5
+ "width": 32,
6
+ "num_blocks": 3,
7
+ "feat1": 64,
8
+ "feat2": 32,
9
+ "use_refine": false,
10
+ "auto_map": {
11
+ "AutoConfig": "configuration_upscaler.UpscalerConfig",
12
+ "AutoModel": "modeling_upscaler.UpscalerModel",
13
+ "AutoImageProcessor": "image_processing_upscaler.UpscalerImageProcessor"
14
+ }
15
+ }
configuration_upscaler.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+
3
+ class UpscalerConfig(PretrainedConfig):
4
+ model_type = "upscaler"
5
+
6
+ def __init__(
7
+ self,
8
+ scale: int = 2,
9
+ in_channels: int = 3,
10
+ width: int = 32,
11
+ num_blocks: int = 3,
12
+ feat1: int = 64,
13
+ feat2: int = 32,
14
+ use_refine: bool = False,
15
+ **kwargs,
16
+ ):
17
+ super().__init__(**kwargs)
18
+ self.scale = int(scale)
19
+ self.in_channels = int(in_channels)
20
+ self.width = int(width)
21
+ self.num_blocks = int(num_blocks)
22
+ self.feat1 = int(feat1)
23
+ self.feat2 = int(feat2)
24
+ self.use_refine = bool(use_refine)
image_processing_upscaler.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional, Union
2
+
3
+ import numpy as np
4
+ import torch
5
+ from PIL import Image
6
+
7
+ from transformers import ImageProcessingMixin
8
+
9
+
10
+ def _to_rgb(img: Image.Image) -> Image.Image:
11
+ if img.mode != "RGB":
12
+ return img.convert("RGB")
13
+ return img
14
+
15
+
16
+ class UpscalerImageProcessor(ImageProcessingMixin):
17
+ """
18
+ Minimal processor:
19
+ - input: PIL or list of PIL
20
+ - output: pixel_values float32 in [0,1], shape (B,3,H,W)
21
+ No ImageNet normalization (recommended for SR trained on [0,1]).
22
+ """
23
+
24
+ model_input_names = ["pixel_values"]
25
+
26
+ def __init__(self, **kwargs):
27
+ super().__init__(**kwargs)
28
+
29
+ def _pil_to_tensor_01(self, img: Image.Image) -> torch.FloatTensor:
30
+ img = _to_rgb(img)
31
+ arr = np.array(img, dtype=np.float32) / 255.0 # H,W,3 in [0,1]
32
+ t = torch.from_numpy(arr).permute(2, 0, 1).contiguous() # 3,H,W
33
+ return t
34
+
35
+ def __call__(
36
+ self,
37
+ images: Union[Image.Image, List[Image.Image]],
38
+ return_tensors: Optional[str] = None,
39
+ **kwargs,
40
+ ) -> Dict[str, Any]:
41
+ if isinstance(images, Image.Image):
42
+ images = [images]
43
+
44
+ tensors = [self._pil_to_tensor_01(im) for im in images]
45
+ pixel_values = torch.stack(tensors, dim=0) # B,3,H,W
46
+
47
+ if return_tensors is None or return_tensors == "pt":
48
+ return {"pixel_values": pixel_values}
49
+ raise ValueError("Only return_tensors=None or 'pt' is supported.")
last.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c37baae50cfbf1ff0bed65729f69c0a6f8855a9a6862a39a1ff13c42b51b4e7b
3
- size 59907923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:839eb5ff4bd63a27252729d11b98a36cadd142861a7b4a5afef032b5bcd61c90
3
+ size 1039253
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b3ac27502f1eebba4f5c3491b7ae35308663f07d73b92ce951156d5badc21a3
3
+ size 338252
modeling_upscaler.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+
8
+ from transformers import PreTrainedModel
9
+ from transformers.utils import ModelOutput
10
+
11
+ from configuration_upscaler import UpscalerConfig
12
+
13
+
14
+ # -------------------------
15
+ # Architecture (same as yours)
16
+ # -------------------------
17
+
18
+ class ResidualBlock(nn.Module):
19
+ def __init__(self, channels: int):
20
+ super().__init__()
21
+ self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
22
+ self.act = nn.ReLU(inplace=True)
23
+ self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
24
+
25
+ def forward(self, x):
26
+ y = self.act(self.conv1(x))
27
+ y = self.conv2(y)
28
+ return x + y
29
+
30
+
31
+ class RestorationNet(nn.Module):
32
+ def __init__(self, in_channels=3, width=32, num_blocks=3):
33
+ super().__init__()
34
+ self.in_conv = nn.Conv2d(in_channels, width, 3, padding=1)
35
+ self.blocks = nn.Sequential(*[ResidualBlock(width) for _ in range(num_blocks)])
36
+ self.out_conv = nn.Conv2d(width, in_channels, 3, padding=1)
37
+
38
+ def forward(self, lr):
39
+ y = self.blocks(self.in_conv(lr))
40
+ y = self.out_conv(y)
41
+ return lr + y
42
+
43
+
44
+ class ESPCNUpsampler(nn.Module):
45
+ def __init__(self, in_channels=3, scale=2, feat1=64, feat2=32, use_refine=False):
46
+ super().__init__()
47
+ assert scale in (2, 3, 4)
48
+ self.conv1 = nn.Conv2d(in_channels, feat1, 5, padding=2)
49
+ self.act1 = nn.ReLU(inplace=True)
50
+ self.conv2 = nn.Conv2d(feat1, feat2, 3, padding=1)
51
+ self.act2 = nn.ReLU(inplace=True)
52
+
53
+ # IMPORTANT: conv3 out_channels depends on scale (PixelShuffle constraint)
54
+ self.conv3 = nn.Conv2d(feat2, in_channels * (scale ** 2), 3, padding=1)
55
+ self.ps = nn.PixelShuffle(scale)
56
+
57
+ self.refine = nn.Conv2d(in_channels, in_channels, 3, padding=1) if use_refine else None
58
+
59
+ def forward(self, x):
60
+ y = self.act1(self.conv1(x))
61
+ y = self.act2(self.conv2(y))
62
+ y = self.ps(self.conv3(y))
63
+ if self.refine is not None:
64
+ y = self.refine(y)
65
+ return y
66
+
67
+
68
+ class TwoStageSR(nn.Module):
69
+ def __init__(self, in_channels=3, scale=2, width=32, num_blocks=3, feat1=64, feat2=32, use_refine=False):
70
+ super().__init__()
71
+ self.scale = scale
72
+ self.restoration = RestorationNet(in_channels=in_channels, width=width, num_blocks=num_blocks)
73
+ self.upsampler = ESPCNUpsampler(
74
+ in_channels=in_channels, scale=scale, feat1=feat1, feat2=feat2, use_refine=use_refine
75
+ )
76
+
77
+ def forward(self, lr):
78
+ lr_clean = self.restoration(lr)
79
+ hr_pred = self.upsampler(lr_clean)
80
+ return hr_pred
81
+
82
+
83
+ # -------------------------
84
+ # Transformers output
85
+ # -------------------------
86
+
87
+ @dataclass
88
+ class UpscalerOutput(ModelOutput):
89
+ sr: torch.FloatTensor
90
+
91
+
92
+ class UpscalerModel(PreTrainedModel):
93
+ config_class = UpscalerConfig
94
+ main_input_name = "pixel_values"
95
+
96
+ def __init__(self, config: UpscalerConfig):
97
+ super().__init__(config)
98
+
99
+ self.model = TwoStageSR(
100
+ in_channels=config.in_channels,
101
+ scale=config.scale,
102
+ width=config.width,
103
+ num_blocks=config.num_blocks,
104
+ feat1=config.feat1,
105
+ feat2=config.feat2,
106
+ use_refine=config.use_refine,
107
+ )
108
+
109
+ # init weights (optional; usually weights will be loaded)
110
+ self.post_init()
111
+
112
+ def forward(self, pixel_values: torch.FloatTensor, **kwargs) -> UpscalerOutput:
113
+ """
114
+ pixel_values: float tensor in [0,1], shape (B,3,H,W)
115
+ returns: UpscalerOutput(sr=...)
116
+ """
117
+ sr = self.model(pixel_values)
118
+ return UpscalerOutput(sr=sr)
preprocessor_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": false,
3
+ "do_rescale": true,
4
+ "rescale_factor": 0.00392156862745098
5
+ }