Upload 3 files
Browse files- Style Embedder v1.ckpt +3 -0
- gallery_review.py +4 -4
- minimal_script.py +61 -41
Style Embedder v1.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c438be321112c5fe5abfe25a5299058a952e44f92eede7ddb566b46a2eba270
|
| 3 |
+
size 78835845
|
gallery_review.py
CHANGED
|
@@ -13,7 +13,7 @@ from torch.utils.data import Dataset, DataLoader
|
|
| 13 |
from PIL import Image
|
| 14 |
from matplotlib import cm
|
| 15 |
|
| 16 |
-
from minimal_script import EmbeddingNetworkSmall, closest_interval,
|
| 17 |
from sklearn.cluster import AgglomerativeClustering
|
| 18 |
from sklearn.manifold import TSNE
|
| 19 |
from sklearn.neighbors import KDTree
|
|
@@ -50,7 +50,7 @@ class PredictDataset(Dataset):
|
|
| 50 |
path = self.image_paths[idx]
|
| 51 |
image = imageio.v3.imread(path).copy()
|
| 52 |
image = torch.from_numpy(image).permute(2, 0, 1)
|
| 53 |
-
processed = closest_interval(
|
| 54 |
processed = 2*(processed/255)-1
|
| 55 |
return processed.detach(), path
|
| 56 |
|
|
@@ -95,7 +95,7 @@ def explore_embedding_space(embeddings, image_paths, model):
|
|
| 95 |
# Load and preprocess image
|
| 96 |
img = imageio.v3.imread(image_path).copy()
|
| 97 |
img = torch.from_numpy(img).permute(2, 0, 1)
|
| 98 |
-
img_tensor = closest_interval(
|
| 99 |
img_tensor = 2*(img_tensor/255)-1
|
| 100 |
img_tensor.requires_grad_(True)
|
| 101 |
|
|
@@ -311,6 +311,6 @@ if __name__ == '__main__':
|
|
| 311 |
folder = 'Enter Images folder name here'
|
| 312 |
#folder = 'images_for_style_embedding'
|
| 313 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 314 |
-
model = PLModule.load_from_checkpoint('
|
| 315 |
# 'Grouping' or 'Explore'
|
| 316 |
generate_embeddings(folder, 'Grouping', model)
|
|
|
|
| 13 |
from PIL import Image
|
| 14 |
from matplotlib import cm
|
| 15 |
|
| 16 |
+
from minimal_script import EmbeddingNetworkSmall, closest_interval, adj_size
|
| 17 |
from sklearn.cluster import AgglomerativeClustering
|
| 18 |
from sklearn.manifold import TSNE
|
| 19 |
from sklearn.neighbors import KDTree
|
|
|
|
| 50 |
path = self.image_paths[idx]
|
| 51 |
image = imageio.v3.imread(path).copy()
|
| 52 |
image = torch.from_numpy(image).permute(2, 0, 1)
|
| 53 |
+
processed = closest_interval(adj_size(image, 1024))
|
| 54 |
processed = 2*(processed/255)-1
|
| 55 |
return processed.detach(), path
|
| 56 |
|
|
|
|
| 95 |
# Load and preprocess image
|
| 96 |
img = imageio.v3.imread(image_path).copy()
|
| 97 |
img = torch.from_numpy(img).permute(2, 0, 1)
|
| 98 |
+
img_tensor = closest_interval(adj_size(img, 1024)).unsqueeze(0)
|
| 99 |
img_tensor = 2*(img_tensor/255)-1
|
| 100 |
img_tensor.requires_grad_(True)
|
| 101 |
|
|
|
|
| 311 |
folder = 'Enter Images folder name here'
|
| 312 |
#folder = 'images_for_style_embedding'
|
| 313 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 314 |
+
model = PLModule.load_from_checkpoint('Style Embedder v1.ckpt')
|
| 315 |
# 'Grouping' or 'Explore'
|
| 316 |
generate_embeddings(folder, 'Grouping', model)
|
minimal_script.py
CHANGED
|
@@ -9,15 +9,17 @@ from torchvision.transforms import v2
|
|
| 9 |
|
| 10 |
|
| 11 |
class BasicBlock(nn.Module):
|
| 12 |
-
def __init__(self, channels, kernel_size=(3,3)):
|
| 13 |
super().__init__()
|
| 14 |
layers = []
|
| 15 |
num_conv = len(channels)-1
|
| 16 |
for i in range(num_conv):
|
| 17 |
layers.append(nn.Conv2d(channels[i], channels[i+1],
|
| 18 |
kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
|
| 19 |
-
layers.append(nn.InstanceNorm2d(channels[i+1]
|
| 20 |
-
layers.append(nn.
|
|
|
|
|
|
|
| 21 |
self.operations = nn.Sequential(*layers)
|
| 22 |
|
| 23 |
def forward(self, x):
|
|
@@ -25,70 +27,88 @@ class BasicBlock(nn.Module):
|
|
| 25 |
|
| 26 |
|
| 27 |
class ResBlock(nn.Module):
|
| 28 |
-
def __init__(self,
|
| 29 |
super().__init__()
|
| 30 |
layers = []
|
| 31 |
-
if in_channels == out_channels:
|
| 32 |
-
self.mapping = nn.Identity()
|
| 33 |
-
else:
|
| 34 |
-
self.mapping = nn.Conv2d(in_channels, out_channels, 1)
|
| 35 |
for i in range(num_conv):
|
| 36 |
-
layers.append(nn.Conv2d(
|
| 37 |
kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
|
| 38 |
-
layers.append(nn.InstanceNorm2d(
|
| 39 |
-
layers.append(nn.
|
|
|
|
|
|
|
| 40 |
self.operations = nn.Sequential(*layers)
|
| 41 |
|
| 42 |
def forward(self, x):
|
| 43 |
-
return (self.
|
| 44 |
|
| 45 |
|
| 46 |
class ConvPool(nn.Module):
|
| 47 |
def __init__(self, in_channels, out_channels):
|
| 48 |
super().__init__()
|
| 49 |
layers = []
|
| 50 |
-
layers.append(nn.Conv2d(in_channels, out_channels, 4, 2, 1,
|
| 51 |
layers.append(nn.InstanceNorm2d(out_channels, affine=False))
|
| 52 |
-
layers.append(nn.
|
| 53 |
self.operations = nn.Sequential(*layers)
|
| 54 |
|
| 55 |
def forward(self, x):
|
| 56 |
return self.operations(x)
|
| 57 |
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
class EmbeddingNetworkSmall(nn.Module):
|
| 60 |
def __init__(self):
|
| 61 |
super(EmbeddingNetworkSmall, self).__init__()
|
| 62 |
-
self.conv1 = BasicBlock((3, 8, 16), (3, 3))
|
| 63 |
-
self.pool1 = ConvPool(
|
| 64 |
-
self.conv2 = ResBlock(
|
| 65 |
-
self.pool2 = ConvPool(
|
| 66 |
-
self.conv3 = ResBlock(
|
| 67 |
-
self.
|
| 68 |
-
self.
|
| 69 |
-
self.
|
| 70 |
-
self.
|
| 71 |
-
self.
|
| 72 |
-
self.
|
| 73 |
-
self.
|
| 74 |
-
self.
|
| 75 |
-
self.
|
| 76 |
-
self.
|
| 77 |
-
self.
|
| 78 |
-
self.fc2norm = nn.LayerNorm(128, elementwise_affine=False)
|
| 79 |
-
self.fc3 = nn.Linear(128, 8)
|
| 80 |
-
|
| 81 |
-
self.use_checkpoint = False
|
| 82 |
|
| 83 |
def forward(self, x):
|
| 84 |
x = self.pool1(self.conv1(x))
|
| 85 |
x = self.pool2(self.conv2(x))
|
| 86 |
-
x = self.pool3(self.
|
| 87 |
x = self.conv4(x)
|
| 88 |
|
| 89 |
-
x = self.
|
| 90 |
-
x = self.
|
| 91 |
-
x = self.
|
| 92 |
x = self.act(self.fc1norm(self.fc1(x)))
|
| 93 |
x = self.act(self.fc2norm(self.fc2(x)))
|
| 94 |
x = self.fc3(x)
|
|
@@ -106,7 +126,7 @@ class PLModule(pl.LightningModule):
|
|
| 106 |
return self.network(x)
|
| 107 |
|
| 108 |
|
| 109 |
-
def
|
| 110 |
h, w = img.shape[1], img.shape[2]
|
| 111 |
area = h * w
|
| 112 |
if area > size ** 2:
|
|
@@ -129,13 +149,13 @@ def closest_interval(img, interval=8):
|
|
| 129 |
|
| 130 |
if __name__ == '__main__':
|
| 131 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 132 |
-
model = PLModule.load_from_checkpoint('
|
| 133 |
model.to(device)
|
| 134 |
model.eval()
|
| 135 |
|
| 136 |
img = imageio.v3.imread('images_for_style_embedding/6857740.webp').copy()
|
| 137 |
img = torch.from_numpy(img).permute(2, 0, 1)
|
| 138 |
-
img = closest_interval(
|
| 139 |
img = 2*(img/255)-1
|
| 140 |
img = img.unsqueeze(0).to(device)
|
| 141 |
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class BasicBlock(nn.Module):
|
| 12 |
+
def __init__(self, channels, kernel_size=(3,3), dropout=0.0):
|
| 13 |
super().__init__()
|
| 14 |
layers = []
|
| 15 |
num_conv = len(channels)-1
|
| 16 |
for i in range(num_conv):
|
| 17 |
layers.append(nn.Conv2d(channels[i], channels[i+1],
|
| 18 |
kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
|
| 19 |
+
layers.append(nn.InstanceNorm2d(channels[i+1]))
|
| 20 |
+
layers.append(nn.LeakyReLU(inplace=True))
|
| 21 |
+
if dropout > 0.0:
|
| 22 |
+
layers.append(nn.Dropout2d(dropout))
|
| 23 |
self.operations = nn.Sequential(*layers)
|
| 24 |
|
| 25 |
def forward(self, x):
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
class ResBlock(nn.Module):
|
| 30 |
+
def __init__(self, channels, kernel_size=(3,3), num_conv=2, dropout=0.0):
|
| 31 |
super().__init__()
|
| 32 |
layers = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
for i in range(num_conv):
|
| 34 |
+
layers.append(nn.Conv2d(channels, channels,
|
| 35 |
kernel_size=kernel_size, padding='same', padding_mode='reflect', bias=False))
|
| 36 |
+
layers.append(nn.InstanceNorm2d(channels))
|
| 37 |
+
layers.append(nn.LeakyReLU(inplace=True))
|
| 38 |
+
self.norm = nn.InstanceNorm2d(channels)
|
| 39 |
+
self.dropout = nn.Dropout2d(dropout) if dropout > 0 else nn.Identity()
|
| 40 |
self.operations = nn.Sequential(*layers)
|
| 41 |
|
| 42 |
def forward(self, x):
|
| 43 |
+
return self.dropout(self.norm(x + self.operations(x)))
|
| 44 |
|
| 45 |
|
| 46 |
class ConvPool(nn.Module):
|
| 47 |
def __init__(self, in_channels, out_channels):
|
| 48 |
super().__init__()
|
| 49 |
layers = []
|
| 50 |
+
layers.append(nn.Conv2d(in_channels, out_channels, 4, 2, 1, padding_mode='reflect', bias=False))
|
| 51 |
layers.append(nn.InstanceNorm2d(out_channels, affine=False))
|
| 52 |
+
layers.append(nn.LeakyReLU(inplace=True))
|
| 53 |
self.operations = nn.Sequential(*layers)
|
| 54 |
|
| 55 |
def forward(self, x):
|
| 56 |
return self.operations(x)
|
| 57 |
|
| 58 |
|
| 59 |
+
class CompactGramMatrix(nn.Module):
|
| 60 |
+
def __init__(self, in_channels):
|
| 61 |
+
super().__init__()
|
| 62 |
+
self.in_channels = in_channels
|
| 63 |
+
# Precompute indices for lower triangle (including diagonal)
|
| 64 |
+
self.register_buffer('tril_indices',
|
| 65 |
+
torch.tril_indices(in_channels, in_channels, offset=0, dtype=torch.int32))
|
| 66 |
+
|
| 67 |
+
def forward(self, x):
|
| 68 |
+
"""
|
| 69 |
+
Input: (B, C, H, W)
|
| 70 |
+
Output: (B, C*(C+1)//2) compact Gram features
|
| 71 |
+
"""
|
| 72 |
+
b, c, h, w = x.size()
|
| 73 |
+
x = x.view(b, c, -1) / ((h * w) ** 0.5) # Flatten spatial dimensions -> (B, C, H*W), then normalise
|
| 74 |
+
|
| 75 |
+
# Compute full Gram matrix (still needed temporarily)
|
| 76 |
+
gram = torch.bmm(x, x.transpose(1, 2)) # (B, C, C)
|
| 77 |
+
|
| 78 |
+
# Extract lower triangle including diagonal
|
| 79 |
+
compact_gram = gram[:, self.tril_indices[0], self.tril_indices[1]] # (B, n_unique)
|
| 80 |
+
return compact_gram
|
| 81 |
+
|
| 82 |
+
|
| 83 |
class EmbeddingNetworkSmall(nn.Module):
|
| 84 |
def __init__(self):
|
| 85 |
super(EmbeddingNetworkSmall, self).__init__()
|
| 86 |
+
self.conv1 = BasicBlock((3, 8, 16, 24), (3, 3))
|
| 87 |
+
self.pool1 = ConvPool(24, 48) # 2
|
| 88 |
+
self.conv2 = ResBlock(48, (3, 3), 3, 0.2)
|
| 89 |
+
self.pool2 = ConvPool(48, 96) # 4
|
| 90 |
+
self.conv3 = ResBlock(96, (3, 3), 2, 0.25)
|
| 91 |
+
self.pool3 = ConvPool(96, 192) # 8
|
| 92 |
+
self.conv4 = ResBlock(192, (3, 3), 2, 0.3)
|
| 93 |
+
self.gram = CompactGramMatrix(192)
|
| 94 |
+
self.compact = nn.Linear(192*(192+1)//2, 192, bias=False)
|
| 95 |
+
self.conpactnorm = nn.LayerNorm(192, elementwise_affine=False)
|
| 96 |
+
self.fc1 = nn.Linear(192, 192, bias=False)
|
| 97 |
+
self.fc1norm = nn.LayerNorm(192, elementwise_affine=False)
|
| 98 |
+
self.act = nn.LeakyReLU(inplace=True)
|
| 99 |
+
self.fc2 = nn.Linear(192, 192, bias=False)
|
| 100 |
+
self.fc2norm = nn.LayerNorm(192, elementwise_affine=False)
|
| 101 |
+
self.fc3 = nn.Linear(192, 8)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
def forward(self, x):
|
| 104 |
x = self.pool1(self.conv1(x))
|
| 105 |
x = self.pool2(self.conv2(x))
|
| 106 |
+
x = self.pool3(self.conv3(x))
|
| 107 |
x = self.conv4(x)
|
| 108 |
|
| 109 |
+
x = self.gram(x)
|
| 110 |
+
x = self.compact(x)
|
| 111 |
+
x = self.conpactnorm(x)
|
| 112 |
x = self.act(self.fc1norm(self.fc1(x)))
|
| 113 |
x = self.act(self.fc2norm(self.fc2(x)))
|
| 114 |
x = self.fc3(x)
|
|
|
|
| 126 |
return self.network(x)
|
| 127 |
|
| 128 |
|
| 129 |
+
def adj_size(img, size=512):
|
| 130 |
h, w = img.shape[1], img.shape[2]
|
| 131 |
area = h * w
|
| 132 |
if area > size ** 2:
|
|
|
|
| 149 |
|
| 150 |
if __name__ == '__main__':
|
| 151 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 152 |
+
model = PLModule.load_from_checkpoint('Style Embedder v1.ckpt')
|
| 153 |
model.to(device)
|
| 154 |
model.eval()
|
| 155 |
|
| 156 |
img = imageio.v3.imread('images_for_style_embedding/6857740.webp').copy()
|
| 157 |
img = torch.from_numpy(img).permute(2, 0, 1)
|
| 158 |
+
img = closest_interval(adj_size(img))
|
| 159 |
img = 2*(img/255)-1
|
| 160 |
img = img.unsqueeze(0).to(device)
|
| 161 |
|