Fgdfgfthgr commited on
Commit
a10ced8
·
verified ·
1 Parent(s): fbfa4f5

Upload 3 files

Browse files
Files changed (3) hide show
  1. Style Embedder v1.ckpt +3 -0
  2. gallery_review.py +4 -4
  3. minimal_script.py +61 -41
Style Embedder v1.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c438be321112c5fe5abfe25a5299058a952e44f92eede7ddb566b46a2eba270
3
+ size 78835845
gallery_review.py CHANGED
@@ -13,7 +13,7 @@ from torch.utils.data import Dataset, DataLoader
13
  from PIL import Image
14
  from matplotlib import cm
15
 
16
- from minimal_script import EmbeddingNetworkSmall, closest_interval, down_to_1k
17
  from sklearn.cluster import AgglomerativeClustering
18
  from sklearn.manifold import TSNE
19
  from sklearn.neighbors import KDTree
@@ -50,7 +50,7 @@ class PredictDataset(Dataset):
50
  path = self.image_paths[idx]
51
  image = imageio.v3.imread(path).copy()
52
  image = torch.from_numpy(image).permute(2, 0, 1)
53
- processed = closest_interval(down_to_1k(image, 1024))
54
  processed = 2*(processed/255)-1
55
  return processed.detach(), path
56
 
@@ -95,7 +95,7 @@ def explore_embedding_space(embeddings, image_paths, model):
95
  # Load and preprocess image
96
  img = imageio.v3.imread(image_path).copy()
97
  img = torch.from_numpy(img).permute(2, 0, 1)
98
- img_tensor = closest_interval(down_to_1k(img, 1024)).unsqueeze(0)
99
  img_tensor = 2*(img_tensor/255)-1
100
  img_tensor.requires_grad_(True)
101
 
@@ -311,6 +311,6 @@ if __name__ == '__main__':
311
  folder = 'Enter Images folder name here'
312
  #folder = 'images_for_style_embedding'
313
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
314
- model = PLModule.load_from_checkpoint('Final_8.ckpt')
315
  # 'Grouping' or 'Explore'
316
  generate_embeddings(folder, 'Grouping', model)
 
13
  from PIL import Image
14
  from matplotlib import cm
15
 
16
+ from minimal_script import EmbeddingNetworkSmall, closest_interval, adj_size
17
  from sklearn.cluster import AgglomerativeClustering
18
  from sklearn.manifold import TSNE
19
  from sklearn.neighbors import KDTree
 
50
  path = self.image_paths[idx]
51
  image = imageio.v3.imread(path).copy()
52
  image = torch.from_numpy(image).permute(2, 0, 1)
53
+ processed = closest_interval(adj_size(image, 1024))
54
  processed = 2*(processed/255)-1
55
  return processed.detach(), path
56
 
 
95
  # Load and preprocess image
96
  img = imageio.v3.imread(image_path).copy()
97
  img = torch.from_numpy(img).permute(2, 0, 1)
98
+ img_tensor = closest_interval(adj_size(img, 1024)).unsqueeze(0)
99
  img_tensor = 2*(img_tensor/255)-1
100
  img_tensor.requires_grad_(True)
101
 
 
311
  folder = 'Enter Images folder name here'
312
  #folder = 'images_for_style_embedding'
313
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
314
+ model = PLModule.load_from_checkpoint('Style Embedder v1.ckpt')
315
  # 'Grouping' or 'Explore'
316
  generate_embeddings(folder, 'Grouping', model)
minimal_script.py CHANGED
@@ -9,15 +9,17 @@ from torchvision.transforms import v2
9
 
10
 
11
  class BasicBlock(nn.Module):
12
- def __init__(self, channels, kernel_size=(3,3)):
13
  super().__init__()
14
  layers = []
15
  num_conv = len(channels)-1
16
  for i in range(num_conv):
17
  layers.append(nn.Conv2d(channels[i], channels[i+1],
18
  kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
19
- layers.append(nn.InstanceNorm2d(channels[i+1], affine=False))
20
- layers.append(nn.ReLU())
 
 
21
  self.operations = nn.Sequential(*layers)
22
 
23
  def forward(self, x):
@@ -25,70 +27,88 @@ class BasicBlock(nn.Module):
25
 
26
 
27
  class ResBlock(nn.Module):
28
- def __init__(self, in_channels, out_channels, kernel_size=(3,3), num_conv=2):
29
  super().__init__()
30
  layers = []
31
- if in_channels == out_channels:
32
- self.mapping = nn.Identity()
33
- else:
34
- self.mapping = nn.Conv2d(in_channels, out_channels, 1)
35
  for i in range(num_conv):
36
- layers.append(nn.Conv2d(in_channels if i == 0 else out_channels, out_channels,
37
  kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
38
- layers.append(nn.InstanceNorm2d(out_channels, affine=False))
39
- layers.append(nn.ReLU())
 
 
40
  self.operations = nn.Sequential(*layers)
41
 
42
  def forward(self, x):
43
- return (self.mapping(x) + self.operations(x)) / math.sqrt(2)
44
 
45
 
46
  class ConvPool(nn.Module):
47
  def __init__(self, in_channels, out_channels):
48
  super().__init__()
49
  layers = []
50
- layers.append(nn.Conv2d(in_channels, out_channels, 4, 2, 1, bias=False, padding_mode='reflect'))
51
  layers.append(nn.InstanceNorm2d(out_channels, affine=False))
52
- layers.append(nn.ReLU(inplace=True))
53
  self.operations = nn.Sequential(*layers)
54
 
55
  def forward(self, x):
56
  return self.operations(x)
57
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  class EmbeddingNetworkSmall(nn.Module):
60
  def __init__(self):
61
  super(EmbeddingNetworkSmall, self).__init__()
62
- self.conv1 = BasicBlock((3, 8, 16), (3, 3))
63
- self.pool1 = ConvPool(16, 32) # 2
64
- self.conv2 = ResBlock(32, 32, (3, 3), 3)
65
- self.pool2 = ConvPool(32, 64) # 4
66
- self.conv3 = ResBlock(64, 64, (3, 3), 3)
67
- self.drop1 = nn.Dropout2d(p=0.25)
68
- self.pool3 = ConvPool(64, 128) # 8
69
- self.conv4 = ResBlock(128, 128, (3, 3), 3)
70
- self.adpool = nn.AdaptiveAvgPool2d(1)
71
- self.poolnorm = nn.LayerNorm(128, elementwise_affine=False)
72
- self.flatten = nn.Flatten()
73
- self.drop2 = nn.Dropout(p=0.33)
74
- self.fc1 = nn.Linear(128, 128, bias=False)
75
- self.fc1norm = nn.LayerNorm(128, elementwise_affine=False)
76
- self.act = nn.ReLU()
77
- self.fc2 = nn.Linear(128, 128, bias=False)
78
- self.fc2norm = nn.LayerNorm(128, elementwise_affine=False)
79
- self.fc3 = nn.Linear(128, 8)
80
-
81
- self.use_checkpoint = False
82
 
83
  def forward(self, x):
84
  x = self.pool1(self.conv1(x))
85
  x = self.pool2(self.conv2(x))
86
- x = self.pool3(self.drop1(self.conv3(x)))
87
  x = self.conv4(x)
88
 
89
- x = self.adpool(x)
90
- x = self.poolnorm(self.flatten(x))
91
- x = self.act(self.drop2(x))
92
  x = self.act(self.fc1norm(self.fc1(x)))
93
  x = self.act(self.fc2norm(self.fc2(x)))
94
  x = self.fc3(x)
@@ -106,7 +126,7 @@ class PLModule(pl.LightningModule):
106
  return self.network(x)
107
 
108
 
109
- def down_to_1k(img, size=1024):
110
  h, w = img.shape[1], img.shape[2]
111
  area = h * w
112
  if area > size ** 2:
@@ -129,13 +149,13 @@ def closest_interval(img, interval=8):
129
 
130
  if __name__ == '__main__':
131
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
132
- model = PLModule.load_from_checkpoint('Final_8.ckpt')
133
  model.to(device)
134
  model.eval()
135
 
136
  img = imageio.v3.imread('images_for_style_embedding/6857740.webp').copy()
137
  img = torch.from_numpy(img).permute(2, 0, 1)
138
- img = closest_interval(down_to_1k(img))
139
  img = 2*(img/255)-1
140
  img = img.unsqueeze(0).to(device)
141
 
 
9
 
10
 
11
  class BasicBlock(nn.Module):
12
+ def __init__(self, channels, kernel_size=(3,3), dropout=0.0):
13
  super().__init__()
14
  layers = []
15
  num_conv = len(channels)-1
16
  for i in range(num_conv):
17
  layers.append(nn.Conv2d(channels[i], channels[i+1],
18
  kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
19
+ layers.append(nn.InstanceNorm2d(channels[i+1]))
20
+ layers.append(nn.LeakyReLU(inplace=True))
21
+ if dropout > 0.0:
22
+ layers.append(nn.Dropout2d(dropout))
23
  self.operations = nn.Sequential(*layers)
24
 
25
  def forward(self, x):
 
27
 
28
 
29
  class ResBlock(nn.Module):
30
+ def __init__(self, channels, kernel_size=(3,3), num_conv=2, dropout=0.0):
31
  super().__init__()
32
  layers = []
 
 
 
 
33
  for i in range(num_conv):
34
+ layers.append(nn.Conv2d(channels, channels,
35
  kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
36
+ layers.append(nn.InstanceNorm2d(channels))
37
+ layers.append(nn.LeakyReLU(inplace=True))
38
+ self.norm = nn.InstanceNorm2d(channels)
39
+ self.dropout = nn.Dropout2d(dropout) if dropout > 0 else nn.Identity()
40
  self.operations = nn.Sequential(*layers)
41
 
42
  def forward(self, x):
43
+ return self.dropout(self.norm(x + self.operations(x)))
44
 
45
 
46
  class ConvPool(nn.Module):
47
  def __init__(self, in_channels, out_channels):
48
  super().__init__()
49
  layers = []
50
+ layers.append(nn.Conv2d(in_channels, out_channels, 4, 2, 1, padding_mode='reflect', bias=False))
51
  layers.append(nn.InstanceNorm2d(out_channels, affine=False))
52
+ layers.append(nn.LeakyReLU(inplace=True))
53
  self.operations = nn.Sequential(*layers)
54
 
55
  def forward(self, x):
56
  return self.operations(x)
57
 
58
 
59
+ class CompactGramMatrix(nn.Module):
60
+ def __init__(self, in_channels):
61
+ super().__init__()
62
+ self.in_channels = in_channels
63
+ # Precompute indices for lower triangle (including diagonal)
64
+ self.register_buffer('tril_indices',
65
+ torch.tril_indices(in_channels, in_channels, offset=0, dtype=torch.int32))
66
+
67
+ def forward(self, x):
68
+ """
69
+ Input: (B, C, H, W)
70
+ Output: (B, C*(C+1)//2) compact Gram features
71
+ """
72
+ b, c, h, w = x.size()
73
+ x = x.view(b, c, -1) / ((h * w) ** 0.5) # Flatten spatial dimensions -> (B, C, H*W), then normalise
74
+
75
+ # Compute full Gram matrix (still needed temporarily)
76
+ gram = torch.bmm(x, x.transpose(1, 2)) # (B, C, C)
77
+
78
+ # Extract lower triangle including diagonal
79
+ compact_gram = gram[:, self.tril_indices[0], self.tril_indices[1]] # (B, n_unique)
80
+ return compact_gram
81
+
82
+
83
  class EmbeddingNetworkSmall(nn.Module):
84
  def __init__(self):
85
  super(EmbeddingNetworkSmall, self).__init__()
86
+ self.conv1 = BasicBlock((3, 8, 16, 24), (3, 3))
87
+ self.pool1 = ConvPool(24, 48) # 2
88
+ self.conv2 = ResBlock(48, (3, 3), 3, 0.2)
89
+ self.pool2 = ConvPool(48, 96) # 4
90
+ self.conv3 = ResBlock(96, (3, 3), 2, 0.25)
91
+ self.pool3 = ConvPool(96, 192) # 8
92
+ self.conv4 = ResBlock(192, (3, 3), 2, 0.3)
93
+ self.gram = CompactGramMatrix(192)
94
+ self.compact = nn.Linear(192*(192+1)//2, 192, bias=False)
95
+ self.conpactnorm = nn.LayerNorm(192, elementwise_affine=False)
96
+ self.fc1 = nn.Linear(192, 192, bias=False)
97
+ self.fc1norm = nn.LayerNorm(192, elementwise_affine=False)
98
+ self.act = nn.LeakyReLU(inplace=True)
99
+ self.fc2 = nn.Linear(192, 192, bias=False)
100
+ self.fc2norm = nn.LayerNorm(192, elementwise_affine=False)
101
+ self.fc3 = nn.Linear(192, 8)
 
 
 
 
102
 
103
  def forward(self, x):
104
  x = self.pool1(self.conv1(x))
105
  x = self.pool2(self.conv2(x))
106
+ x = self.pool3(self.conv3(x))
107
  x = self.conv4(x)
108
 
109
+ x = self.gram(x)
110
+ x = self.compact(x)
111
+ x = self.conpactnorm(x)
112
  x = self.act(self.fc1norm(self.fc1(x)))
113
  x = self.act(self.fc2norm(self.fc2(x)))
114
  x = self.fc3(x)
 
126
  return self.network(x)
127
 
128
 
129
+ def adj_size(img, size=512):
130
  h, w = img.shape[1], img.shape[2]
131
  area = h * w
132
  if area > size ** 2:
 
149
 
150
  if __name__ == '__main__':
151
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
152
+ model = PLModule.load_from_checkpoint('Style Embedder v1.ckpt')
153
  model.to(device)
154
  model.eval()
155
 
156
  img = imageio.v3.imread('images_for_style_embedding/6857740.webp').copy()
157
  img = torch.from_numpy(img).permute(2, 0, 1)
158
+ img = closest_interval(adj_size(img))
159
  img = 2*(img/255)-1
160
  img = img.unsqueeze(0).to(device)
161