Fgdfgfthgr commited on
Commit
6df49f3
·
verified ·
1 Parent(s): a10ced8

Upload 3 files

Browse files
Style_Embedder_v2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70a87154bee75329ff204993e3ef1dea058534b6f5d1bead9cd9ffb7c2babc9a
3
+ size 155617760
gallery_review.py CHANGED
@@ -12,25 +12,12 @@ from torch.utils.data import Dataset, DataLoader
12
 
13
  from PIL import Image
14
  from matplotlib import cm
15
-
16
- from minimal_script import EmbeddingNetworkSmall, closest_interval, adj_size
17
  from sklearn.cluster import AgglomerativeClustering
18
  from sklearn.manifold import TSNE
19
  from sklearn.neighbors import KDTree
20
 
21
-
22
- class PLModule(pl.LightningModule):
23
- def __init__(self):
24
- super().__init__()
25
- self.save_hyperparameters()
26
- self.network = EmbeddingNetworkSmall()
27
-
28
- def forward(self, x):
29
- return self.network(x)
30
-
31
- def predict_step(self, batch, batch_idx, dataloader_idx=0):
32
- outputs = self.forward(batch[0])
33
- return outputs, batch[1]
34
 
35
 
36
  class PredictDataset(Dataset):
@@ -101,7 +88,7 @@ def explore_embedding_space(embeddings, image_paths, model):
101
 
102
  # Move to GPU if available
103
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
104
- img_tensor = img_tensor.to(device)
105
 
106
  # Compute embedding and gradient
107
  with torch.enable_grad():
@@ -122,7 +109,7 @@ def explore_embedding_space(embeddings, image_paths, model):
122
  heatmap = cm.jet(grad_norm)[..., :3] # Use jet colormap
123
  return heatmap
124
 
125
- def overlay_heatmap(original_img, heatmap, alpha=0.6):
126
  """Overlay heatmap on original image"""
127
  # Resize heatmap to match original image
128
  heatmap_img = Image.fromarray((heatmap * 255).astype(np.uint8))
@@ -232,7 +219,7 @@ def explore_embedding_space(embeddings, image_paths, model):
232
  def generate_embeddings(image_folder, mode, model):
233
  predict_dataset = PredictDataset(image_folder, 1000)
234
  predict_loader = DataLoader(predict_dataset, batch_size=1, num_workers=5, pin_memory=True)
235
- trainer = pl.Trainer(accelerator="gpu", logger=False, enable_checkpointing=False)
236
  predictions_0 = trainer.predict(model, predict_loader)
237
  predictions = torch.cat([pred[0] for pred in predictions_0], dim=0).numpy()
238
  paths = []
@@ -250,7 +237,8 @@ def generate_embeddings(image_folder, mode, model):
250
  plt.ylabel('Average Norm')
251
  plt.title(f'Average Norm for Each Feature (Column)')
252
  plt.xticks(range(predictions.shape[1]))
253
- plt.show()
 
254
 
255
  plt.figure(figsize=(8, 6))
256
  tsne = TSNE(n_components=2, random_state=42)
@@ -263,7 +251,8 @@ def generate_embeddings(image_folder, mode, model):
263
  plt.legend()
264
  plt.grid(True)
265
  plt.axis('equal')
266
- plt.show()
 
267
 
268
  # List unique clusters
269
  unique_clusters = np.unique(labels)
@@ -291,12 +280,12 @@ def generate_embeddings(image_folder, mode, model):
291
 
292
  demo.launch()
293
  elif mode == 'Explore':
294
- demo = explore_embedding_space(predictions, paths, model.to('cuda'))
295
  demo.launch()
296
 
297
 
298
  # Apply Agglomerative Clustering
299
- def cluster_embeddings(predictions, distance_threshold=6.0):
300
  agg_clustering = AgglomerativeClustering(
301
  n_clusters=None,
302
  distance_threshold=distance_threshold,
@@ -308,9 +297,11 @@ def cluster_embeddings(predictions, distance_threshold=6.0):
308
 
309
 
310
  if __name__ == '__main__':
311
- folder = 'Enter Images folder name here'
312
- #folder = 'images_for_style_embedding'
313
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
314
- model = PLModule.load_from_checkpoint('Style Embedder v1.ckpt')
 
 
315
  # 'Grouping' or 'Explore'
316
- generate_embeddings(folder, 'Grouping', model)
 
12
 
13
  from PIL import Image
14
  from matplotlib import cm
15
+ from safetensors.torch import save_file, load_file
 
16
  from sklearn.cluster import AgglomerativeClustering
17
  from sklearn.manifold import TSNE
18
  from sklearn.neighbors import KDTree
19
 
20
+ from minimal_script import EmbeddingNetwork, closest_interval, adj_size, PLModule
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
  class PredictDataset(Dataset):
 
88
 
89
  # Move to GPU if available
90
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
91
+ img_tensor = img_tensor.to(device).to(torch.float16)
92
 
93
  # Compute embedding and gradient
94
  with torch.enable_grad():
 
109
  heatmap = cm.jet(grad_norm)[..., :3] # Use jet colormap
110
  return heatmap
111
 
112
+ def overlay_heatmap(original_img, heatmap, alpha=0.4):
113
  """Overlay heatmap on original image"""
114
  # Resize heatmap to match original image
115
  heatmap_img = Image.fromarray((heatmap * 255).astype(np.uint8))
 
219
  def generate_embeddings(image_folder, mode, model):
220
  predict_dataset = PredictDataset(image_folder, 1000)
221
  predict_loader = DataLoader(predict_dataset, batch_size=1, num_workers=5, pin_memory=True)
222
+ trainer = pl.Trainer(accelerator="gpu", logger=False, enable_checkpointing=False, precision="16-mixed")
223
  predictions_0 = trainer.predict(model, predict_loader)
224
  predictions = torch.cat([pred[0] for pred in predictions_0], dim=0).numpy()
225
  paths = []
 
237
  plt.ylabel('Average Norm')
238
  plt.title(f'Average Norm for Each Feature (Column)')
239
  plt.xticks(range(predictions.shape[1]))
240
+ #plt.show()
241
+ plt.savefig('Norms.png')
242
 
243
  plt.figure(figsize=(8, 6))
244
  tsne = TSNE(n_components=2, random_state=42)
 
251
  plt.legend()
252
  plt.grid(True)
253
  plt.axis('equal')
254
+ #plt.show()
255
+ plt.savefig('Groups.png')
256
 
257
  # List unique clusters
258
  unique_clusters = np.unique(labels)
 
280
 
281
  demo.launch()
282
  elif mode == 'Explore':
283
+ demo = explore_embedding_space(predictions, paths, model.to('cuda').to(torch.float16))
284
  demo.launch()
285
 
286
 
287
  # Apply Agglomerative Clustering
288
+ def cluster_embeddings(predictions, distance_threshold=32.0):
289
  agg_clustering = AgglomerativeClustering(
290
  n_clusters=None,
291
  distance_threshold=distance_threshold,
 
297
 
298
 
299
  if __name__ == '__main__':
300
+ #folder = 'Enter Images folder name here'
301
+ folder = 'images_for_style_embedding'
302
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
303
+ model = PLModule()
304
+ state_dict = load_file("Style_Embedder_v2.safetensors")
305
+ model.network.load_state_dict(state_dict)
306
  # 'Grouping' or 'Explore'
307
+ generate_embeddings(folder, 'Explore', model)
minimal_script.py CHANGED
@@ -5,7 +5,9 @@ import numpy as np
5
  import torch.nn as nn
6
  import lightning.pytorch as pl
7
  import imageio
 
8
  from torchvision.transforms import v2
 
9
 
10
 
11
  class BasicBlock(nn.Module):
@@ -16,7 +18,7 @@ class BasicBlock(nn.Module):
16
  for i in range(num_conv):
17
  layers.append(nn.Conv2d(channels[i], channels[i+1],
18
  kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
19
- layers.append(nn.InstanceNorm2d(channels[i+1]))
20
  layers.append(nn.LeakyReLU(inplace=True))
21
  if dropout > 0.0:
22
  layers.append(nn.Dropout2d(dropout))
@@ -27,20 +29,20 @@ class BasicBlock(nn.Module):
27
 
28
 
29
  class ResBlock(nn.Module):
30
- def __init__(self, channels, kernel_size=(3,3), num_conv=2, dropout=0.0):
31
  super().__init__()
32
  layers = []
33
  for i in range(num_conv):
 
 
 
 
34
  layers.append(nn.Conv2d(channels, channels,
35
  kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
36
- layers.append(nn.InstanceNorm2d(channels))
37
- layers.append(nn.LeakyReLU(inplace=True))
38
- self.norm = nn.InstanceNorm2d(channels)
39
- self.dropout = nn.Dropout2d(dropout) if dropout > 0 else nn.Identity()
40
  self.operations = nn.Sequential(*layers)
41
 
42
  def forward(self, x):
43
- return self.dropout(self.norm(x + self.operations(x)))
44
 
45
 
46
  class ConvPool(nn.Module):
@@ -48,8 +50,8 @@ class ConvPool(nn.Module):
48
  super().__init__()
49
  layers = []
50
  layers.append(nn.Conv2d(in_channels, out_channels, 4, 2, 1, padding_mode='reflect', bias=False))
51
- layers.append(nn.InstanceNorm2d(out_channels, affine=False))
52
- layers.append(nn.LeakyReLU(inplace=True))
53
  self.operations = nn.Sequential(*layers)
54
 
55
  def forward(self, x):
@@ -80,27 +82,29 @@ class CompactGramMatrix(nn.Module):
80
  return compact_gram
81
 
82
 
83
- class EmbeddingNetworkSmall(nn.Module):
84
  def __init__(self):
85
- super(EmbeddingNetworkSmall, self).__init__()
86
- self.conv1 = BasicBlock((3, 8, 16, 24), (3, 3))
87
- self.pool1 = ConvPool(24, 48) # 2
88
- self.conv2 = ResBlock(48, (3, 3), 3, 0.2)
89
- self.pool2 = ConvPool(48, 96) # 4
90
- self.conv3 = ResBlock(96, (3, 3), 2, 0.25)
91
- self.pool3 = ConvPool(96, 192) # 8
92
- self.conv4 = ResBlock(192, (3, 3), 2, 0.3)
93
- self.gram = CompactGramMatrix(192)
94
- self.compact = nn.Linear(192*(192+1)//2, 192, bias=False)
95
- self.conpactnorm = nn.LayerNorm(192, elementwise_affine=False)
96
- self.fc1 = nn.Linear(192, 192, bias=False)
97
- self.fc1norm = nn.LayerNorm(192, elementwise_affine=False)
 
98
  self.act = nn.LeakyReLU(inplace=True)
99
- self.fc2 = nn.Linear(192, 192, bias=False)
100
- self.fc2norm = nn.LayerNorm(192, elementwise_affine=False)
101
- self.fc3 = nn.Linear(192, 8)
102
 
103
  def forward(self, x):
 
104
  x = self.pool1(self.conv1(x))
105
  x = self.pool2(self.conv2(x))
106
  x = self.pool3(self.conv3(x))
@@ -120,13 +124,17 @@ class PLModule(pl.LightningModule):
120
  def __init__(self):
121
  super().__init__()
122
  self.save_hyperparameters()
123
- self.network = EmbeddingNetworkSmall()
124
 
125
  def forward(self, x):
126
  return self.network(x)
127
 
 
 
 
 
128
 
129
- def adj_size(img, size=512):
130
  h, w = img.shape[1], img.shape[2]
131
  area = h * w
132
  if area > size ** 2:
@@ -149,15 +157,19 @@ def closest_interval(img, interval=8):
149
 
150
  if __name__ == '__main__':
151
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
152
- model = PLModule.load_from_checkpoint('Style Embedder v1.ckpt')
153
- model.to(device)
 
 
 
 
154
  model.eval()
155
 
156
  img = imageio.v3.imread('images_for_style_embedding/6857740.webp').copy()
157
  img = torch.from_numpy(img).permute(2, 0, 1)
158
  img = closest_interval(adj_size(img))
159
  img = 2*(img/255)-1
160
- img = img.unsqueeze(0).to(device)
161
 
162
  pred = model(img)
163
- print(pred)
 
5
  import torch.nn as nn
6
  import lightning.pytorch as pl
7
  import imageio
8
+ import safetensors
9
  from torchvision.transforms import v2
10
+ from safetensors.torch import save_file, load_file
11
 
12
 
13
  class BasicBlock(nn.Module):
 
18
  for i in range(num_conv):
19
  layers.append(nn.Conv2d(channels[i], channels[i+1],
20
  kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
21
+ layers.append(nn.GroupNorm(1, channels[i+1]))
22
  layers.append(nn.LeakyReLU(inplace=True))
23
  if dropout > 0.0:
24
  layers.append(nn.Dropout2d(dropout))
 
29
 
30
 
31
  class ResBlock(nn.Module):
32
+ def __init__(self, channels, kernel_size=3, num_conv=2, dropout=0.0):
33
  super().__init__()
34
  layers = []
35
  for i in range(num_conv):
36
+ layers.append(nn.GroupNorm(1, channels))
37
+ if i == num_conv-1 and dropout > 0.0:
38
+ layers.append(nn.Dropout2d(dropout))
39
+ layers.append(nn.LeakyReLU(inplace=True))
40
  layers.append(nn.Conv2d(channels, channels,
41
  kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
 
 
 
 
42
  self.operations = nn.Sequential(*layers)
43
 
44
  def forward(self, x):
45
+ return x + self.operations(x)
46
 
47
 
48
  class ConvPool(nn.Module):
 
50
  super().__init__()
51
  layers = []
52
  layers.append(nn.Conv2d(in_channels, out_channels, 4, 2, 1, padding_mode='reflect', bias=False))
53
+ layers.append(nn.GroupNorm(1, out_channels))
54
+ #layers.append(nn.LeakyReLU(inplace=True))
55
  self.operations = nn.Sequential(*layers)
56
 
57
  def forward(self, x):
 
82
  return compact_gram
83
 
84
 
85
+ class EmbeddingNetwork(nn.Module):
86
  def __init__(self):
87
+ super(EmbeddingNetwork, self).__init__()
88
+ self.input_conv = nn.Conv2d(3, 32, 5, padding='same', padding_mode='reflect', bias=False)
89
+ self.conv1 = ResBlock(32, 3, 3)
90
+ self.pool1 = ConvPool(32, 64) # 2
91
+ self.conv2 = ResBlock(64, 3, 3)
92
+ self.pool2 = ConvPool(64, 128) # 4
93
+ self.conv3 = ResBlock(128, 3, 3)
94
+ self.pool3 = ConvPool(128, 256) # 8
95
+ self.conv4 = ResBlock(256, 3, 3)
96
+ self.gram = CompactGramMatrix(256)
97
+ self.compact = nn.Linear(256*(256+1)//2, 1024, bias=False)
98
+ self.conpactnorm = nn.LayerNorm(1024, elementwise_affine=True)
99
+ self.fc1 = nn.Linear(1024, 1024, bias=False)
100
+ self.fc1norm = nn.LayerNorm(1024, elementwise_affine=True)
101
  self.act = nn.LeakyReLU(inplace=True)
102
+ self.fc2 = nn.Linear(1024, 1024, bias=False)
103
+ self.fc2norm = nn.LayerNorm(1024, elementwise_affine=True)
104
+ self.fc3 = nn.Linear(1024, 4)
105
 
106
  def forward(self, x):
107
+ x = self.input_conv(x)
108
  x = self.pool1(self.conv1(x))
109
  x = self.pool2(self.conv2(x))
110
  x = self.pool3(self.conv3(x))
 
124
  def __init__(self):
125
  super().__init__()
126
  self.save_hyperparameters()
127
+ self.network = EmbeddingNetwork()
128
 
129
  def forward(self, x):
130
  return self.network(x)
131
 
132
+ def predict_step(self, batch, batch_idx, dataloader_idx=0):
133
+ outputs = self.forward(batch[0])
134
+ return outputs, batch[1]
135
+
136
 
137
+ def adj_size(img, size=1536):
138
  h, w = img.shape[1], img.shape[2]
139
  area = h * w
140
  if area > size ** 2:
 
157
 
158
  if __name__ == '__main__':
159
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
160
+
161
+ model = EmbeddingNetwork()
162
+ state_dict = load_file("Style_Embedder_v2.safetensors")
163
+ model.load_state_dict(state_dict)
164
+
165
+ model.to(device).to(torch.float16)
166
  model.eval()
167
 
168
  img = imageio.v3.imread('images_for_style_embedding/6857740.webp').copy()
169
  img = torch.from_numpy(img).permute(2, 0, 1)
170
  img = closest_interval(adj_size(img))
171
  img = 2*(img/255)-1
172
+ img = img.unsqueeze(0).to(device).to(torch.float16)
173
 
174
  pred = model(img)
175
+ print(pred)