Upload 3 files
Browse files- .gitattributes +1 -0
- Style_Embedder_v3.safetensors_ +3 -0
- gallery_review.py +23 -7
- minimal_script.py +14 -8
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
Style_Embedder_v3.safetensors_ filter=lfs diff=lfs merge=lfs -text
|
Style_Embedder_v3.safetensors_
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ba7426f226395512745af91280683fa86a2931a82371cd3ff2beedc99e11b21
|
| 3 |
+
size 155582960
|
gallery_review.py
CHANGED
|
@@ -7,6 +7,7 @@ import imageio
|
|
| 7 |
import random
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
import cv2
|
|
|
|
| 10 |
|
| 11 |
from torch.utils.data import Dataset, DataLoader
|
| 12 |
|
|
@@ -16,6 +17,7 @@ from safetensors.torch import save_file, load_file
|
|
| 16 |
from sklearn.cluster import AgglomerativeClustering
|
| 17 |
from sklearn.manifold import TSNE
|
| 18 |
from sklearn.neighbors import KDTree
|
|
|
|
| 19 |
|
| 20 |
from minimal_script import EmbeddingNetwork, closest_interval, adj_size, PLModule
|
| 21 |
|
|
@@ -126,9 +128,9 @@ def explore_embedding_space(embeddings, image_paths, model):
|
|
| 126 |
def get_overlay_image(image_path):
|
| 127 |
"""Get image with gradient overlay"""
|
| 128 |
img = Image.open(image_path).convert('RGB')
|
| 129 |
-
heatmap = compute_gradient_heatmap(image_path)
|
| 130 |
-
return overlay_heatmap(img, heatmap)
|
| 131 |
-
|
| 132 |
|
| 133 |
def add_caption_to_image(image, caption):
|
| 134 |
"""Add text caption to the bottom of an image"""
|
|
@@ -217,7 +219,7 @@ def explore_embedding_space(embeddings, image_paths, model):
|
|
| 217 |
|
| 218 |
|
| 219 |
def generate_embeddings(image_folder, mode, model):
|
| 220 |
-
predict_dataset = PredictDataset(image_folder,
|
| 221 |
predict_loader = DataLoader(predict_dataset, batch_size=1, num_workers=5, pin_memory=True)
|
| 222 |
trainer = pl.Trainer(accelerator="gpu", logger=False, enable_checkpointing=False, precision="16-mixed")
|
| 223 |
predictions_0 = trainer.predict(model, predict_loader)
|
|
@@ -227,6 +229,20 @@ def generate_embeddings(image_folder, mode, model):
|
|
| 227 |
for i in pred[1]:
|
| 228 |
paths.append(i)
|
| 229 |
if mode == 'Grouping':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
labels = cluster_embeddings(predictions)
|
| 231 |
|
| 232 |
row_norms = np.linalg.norm(predictions, axis=1)
|
|
@@ -297,11 +313,11 @@ def cluster_embeddings(predictions, distance_threshold=32.0):
|
|
| 297 |
|
| 298 |
|
| 299 |
if __name__ == '__main__':
|
| 300 |
-
|
| 301 |
-
folder = 'images_for_style_embedding'
|
| 302 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 303 |
model = PLModule()
|
| 304 |
state_dict = load_file("Style_Embedder_v2.safetensors")
|
| 305 |
model.network.load_state_dict(state_dict)
|
| 306 |
# 'Grouping' or 'Explore'
|
| 307 |
-
generate_embeddings(folder, '
|
|
|
|
| 7 |
import random
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
import cv2
|
| 10 |
+
import skdim
|
| 11 |
|
| 12 |
from torch.utils.data import Dataset, DataLoader
|
| 13 |
|
|
|
|
| 17 |
from sklearn.cluster import AgglomerativeClustering
|
| 18 |
from sklearn.manifold import TSNE
|
| 19 |
from sklearn.neighbors import KDTree
|
| 20 |
+
from sklearn.preprocessing import StandardScaler
|
| 21 |
|
| 22 |
from minimal_script import EmbeddingNetwork, closest_interval, adj_size, PLModule
|
| 23 |
|
|
|
|
| 128 |
def get_overlay_image(image_path):
|
| 129 |
"""Get image with gradient overlay"""
|
| 130 |
img = Image.open(image_path).convert('RGB')
|
| 131 |
+
#heatmap = compute_gradient_heatmap(image_path)
|
| 132 |
+
#return overlay_heatmap(img, heatmap)
|
| 133 |
+
return img
|
| 134 |
|
| 135 |
def add_caption_to_image(image, caption):
|
| 136 |
"""Add text caption to the bottom of an image"""
|
|
|
|
| 219 |
|
| 220 |
|
| 221 |
def generate_embeddings(image_folder, mode, model):
|
| 222 |
+
predict_dataset = PredictDataset(image_folder, 5000)
|
| 223 |
predict_loader = DataLoader(predict_dataset, batch_size=1, num_workers=5, pin_memory=True)
|
| 224 |
trainer = pl.Trainer(accelerator="gpu", logger=False, enable_checkpointing=False, precision="16-mixed")
|
| 225 |
predictions_0 = trainer.predict(model, predict_loader)
|
|
|
|
| 229 |
for i in pred[1]:
|
| 230 |
paths.append(i)
|
| 231 |
if mode == 'Grouping':
|
| 232 |
+
#estimate global intrinsic dimension
|
| 233 |
+
#scaler = StandardScaler()
|
| 234 |
+
#normalised_predictions = scaler.fit_transform(predictions)
|
| 235 |
+
# Initialize estimators
|
| 236 |
+
estimators = [skdim.id.TwoNN(), skdim.id.CorrInt(), skdim.id.DANCo()]
|
| 237 |
+
results = {}
|
| 238 |
+
|
| 239 |
+
for est in estimators:
|
| 240 |
+
est.fit(predictions)
|
| 241 |
+
results[type(est).__name__] = est.dimension_
|
| 242 |
+
|
| 243 |
+
print("Intrinsic Dimension Estimates:")
|
| 244 |
+
for name, dim in results.items():
|
| 245 |
+
print(f"{name}: {dim:.2f}")
|
| 246 |
labels = cluster_embeddings(predictions)
|
| 247 |
|
| 248 |
row_norms = np.linalg.norm(predictions, axis=1)
|
|
|
|
| 313 |
|
| 314 |
|
| 315 |
if __name__ == '__main__':
|
| 316 |
+
folder = 'Enter Images folder name here'
|
| 317 |
+
#folder = 'images_for_style_embedding'
|
| 318 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 319 |
model = PLModule()
|
| 320 |
state_dict = load_file("Style_Embedder_v2.safetensors")
|
| 321 |
model.network.load_state_dict(state_dict)
|
| 322 |
# 'Grouping' or 'Explore'
|
| 323 |
+
generate_embeddings(folder, 'Grouping', model)
|
minimal_script.py
CHANGED
|
@@ -18,7 +18,7 @@ class BasicBlock(nn.Module):
|
|
| 18 |
for i in range(num_conv):
|
| 19 |
layers.append(nn.Conv2d(channels[i], channels[i+1],
|
| 20 |
kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
|
| 21 |
-
layers.append(nn.
|
| 22 |
layers.append(nn.LeakyReLU(inplace=True))
|
| 23 |
if dropout > 0.0:
|
| 24 |
layers.append(nn.Dropout2d(dropout))
|
|
@@ -33,7 +33,7 @@ class ResBlock(nn.Module):
|
|
| 33 |
super().__init__()
|
| 34 |
layers = []
|
| 35 |
for i in range(num_conv):
|
| 36 |
-
layers.append(nn.
|
| 37 |
if i == num_conv-1 and dropout > 0.0:
|
| 38 |
layers.append(nn.Dropout2d(dropout))
|
| 39 |
layers.append(nn.LeakyReLU(inplace=True))
|
|
@@ -50,7 +50,7 @@ class ConvPool(nn.Module):
|
|
| 50 |
super().__init__()
|
| 51 |
layers = []
|
| 52 |
layers.append(nn.Conv2d(in_channels, out_channels, 4, 2, 1, padding_mode='reflect', bias=False))
|
| 53 |
-
layers.append(nn.
|
| 54 |
#layers.append(nn.LeakyReLU(inplace=True))
|
| 55 |
self.operations = nn.Sequential(*layers)
|
| 56 |
|
|
@@ -95,13 +95,13 @@ class EmbeddingNetwork(nn.Module):
|
|
| 95 |
self.conv4 = ResBlock(256, 3, 3)
|
| 96 |
self.gram = CompactGramMatrix(256)
|
| 97 |
self.compact = nn.Linear(256*(256+1)//2, 1024, bias=False)
|
| 98 |
-
self.conpactnorm = nn.LayerNorm(1024, elementwise_affine=
|
| 99 |
self.fc1 = nn.Linear(1024, 1024, bias=False)
|
| 100 |
-
self.fc1norm = nn.LayerNorm(1024, elementwise_affine=
|
| 101 |
self.act = nn.LeakyReLU(inplace=True)
|
| 102 |
self.fc2 = nn.Linear(1024, 1024, bias=False)
|
| 103 |
-
self.fc2norm = nn.LayerNorm(1024, elementwise_affine=
|
| 104 |
-
self.fc3 = nn.Linear(1024,
|
| 105 |
|
| 106 |
def forward(self, x):
|
| 107 |
x = self.input_conv(x)
|
|
@@ -125,6 +125,12 @@ class PLModule(pl.LightningModule):
|
|
| 125 |
super().__init__()
|
| 126 |
self.save_hyperparameters()
|
| 127 |
self.network = EmbeddingNetwork()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
def forward(self, x):
|
| 130 |
return self.network(x)
|
|
@@ -159,7 +165,7 @@ if __name__ == '__main__':
|
|
| 159 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 160 |
|
| 161 |
model = EmbeddingNetwork()
|
| 162 |
-
state_dict = load_file("
|
| 163 |
model.load_state_dict(state_dict)
|
| 164 |
|
| 165 |
model.to(device).to(torch.float16)
|
|
|
|
| 18 |
for i in range(num_conv):
|
| 19 |
layers.append(nn.Conv2d(channels[i], channels[i+1],
|
| 20 |
kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
|
| 21 |
+
layers.append(nn.InstanceNorm2d(channels[i+1]))
|
| 22 |
layers.append(nn.LeakyReLU(inplace=True))
|
| 23 |
if dropout > 0.0:
|
| 24 |
layers.append(nn.Dropout2d(dropout))
|
|
|
|
| 33 |
super().__init__()
|
| 34 |
layers = []
|
| 35 |
for i in range(num_conv):
|
| 36 |
+
layers.append(nn.InstanceNorm2d(channels))
|
| 37 |
if i == num_conv-1 and dropout > 0.0:
|
| 38 |
layers.append(nn.Dropout2d(dropout))
|
| 39 |
layers.append(nn.LeakyReLU(inplace=True))
|
|
|
|
| 50 |
super().__init__()
|
| 51 |
layers = []
|
| 52 |
layers.append(nn.Conv2d(in_channels, out_channels, 4, 2, 1, padding_mode='reflect', bias=False))
|
| 53 |
+
layers.append(nn.InstanceNorm2d(out_channels))
|
| 54 |
#layers.append(nn.LeakyReLU(inplace=True))
|
| 55 |
self.operations = nn.Sequential(*layers)
|
| 56 |
|
|
|
|
| 95 |
self.conv4 = ResBlock(256, 3, 3)
|
| 96 |
self.gram = CompactGramMatrix(256)
|
| 97 |
self.compact = nn.Linear(256*(256+1)//2, 1024, bias=False)
|
| 98 |
+
self.conpactnorm = nn.LayerNorm(1024, elementwise_affine=False)
|
| 99 |
self.fc1 = nn.Linear(1024, 1024, bias=False)
|
| 100 |
+
self.fc1norm = nn.LayerNorm(1024, elementwise_affine=False)
|
| 101 |
self.act = nn.LeakyReLU(inplace=True)
|
| 102 |
self.fc2 = nn.Linear(1024, 1024, bias=False)
|
| 103 |
+
self.fc2norm = nn.LayerNorm(1024, elementwise_affine=False)
|
| 104 |
+
self.fc3 = nn.Linear(1024, 6)
|
| 105 |
|
| 106 |
def forward(self, x):
|
| 107 |
x = self.input_conv(x)
|
|
|
|
| 125 |
super().__init__()
|
| 126 |
self.save_hyperparameters()
|
| 127 |
self.network = EmbeddingNetwork()
|
| 128 |
+
self.register_buffer("val_pos_sum", torch.tensor(0.0))
|
| 129 |
+
self.register_buffer("val_neg_sum", torch.tensor(0.0))
|
| 130 |
+
self.register_buffer("val_count", torch.tensor(0))
|
| 131 |
+
self.register_buffer("train_pos_sum", torch.tensor(0.0))
|
| 132 |
+
self.register_buffer("train_neg_sum", torch.tensor(0.0))
|
| 133 |
+
self.register_buffer("train_count", torch.tensor(0))
|
| 134 |
|
| 135 |
def forward(self, x):
|
| 136 |
return self.network(x)
|
|
|
|
| 165 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 166 |
|
| 167 |
model = EmbeddingNetwork()
|
| 168 |
+
state_dict = load_file("Style_Embedder_v3.safetensors")
|
| 169 |
model.load_state_dict(state_dict)
|
| 170 |
|
| 171 |
model.to(device).to(torch.float16)
|