i4ata commited on
Commit
5feebb1
·
1 Parent(s): 4517cfd

smol update

Browse files
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
__pycache__/model.cpython-310.pyc DELETED
Binary file (2.44 kB)
 
app.py CHANGED
@@ -1,34 +1,38 @@
 
 
 
1
  import gradio as gr
2
  from PIL import Image
3
  import os
4
-
5
- import torch
6
-
7
- from model import ClassifierModel
8
-
9
  from typing import List, Dict, Union
10
 
 
 
 
11
  class GradioApp:
12
 
13
  def __init__(self) -> None:
14
 
15
- self.models: Dict[str, Union[str, ClassifierModel]] = {
16
- 'Custom': 'models/my_vit.pth',
17
- 'Pretrained': 'models/pretrained_vit.pth'
 
 
 
 
 
 
 
 
18
  }
19
  with open('classname.txt') as f:
20
  self.classes: List[str] = [line.strip() for line in f.readlines()]
21
 
22
  def predict(self, img_file: str, model_name: str) -> Dict[str, float]:
23
 
24
- # Lazy loading of models
25
- if isinstance(self.models[model_name], str):
26
- self.models[model_name] = torch.load(self.models[model_name], map_location='cpu')
27
- self.models[model_name].eval()
28
-
29
- img = torch.unsqueeze(self.models[model_name].val_transform(Image.open(img_file)), 0)
30
  with torch.inference_mode():
31
- preds = torch.softmax(self.models[model_name](img), dim=1)[0].numpy()
32
  return dict(zip(self.classes, preds))
33
 
34
  def launch(self):
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torchvision import models
4
  import gradio as gr
5
  from PIL import Image
6
  import os
 
 
 
 
 
7
  from typing import List, Dict, Union
8
 
9
+ from custom_transformer.vit import ViT
10
+ from transforms import model_transforms
11
+
12
  class GradioApp:
13
 
14
  def __init__(self) -> None:
15
 
16
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
17
+
18
+ custom = ViT().to(device).eval()
19
+ custom.load_state_dict(torch.load('models/my_vit.pt', map_location=device))
20
+
21
+ pretrained = models.vit_b_16().to(device).eval()
22
+ pretrained.load_state_dict(torch.load('models/pretrained_vit.pt', map_location=device))
23
+
24
+ self.models: Dict[str, Union[str, nn.Module]] = {
25
+ 'Custom': custom,
26
+ 'Pretrained': pretrained
27
  }
28
  with open('classname.txt') as f:
29
  self.classes: List[str] = [line.strip() for line in f.readlines()]
30
 
31
  def predict(self, img_file: str, model_name: str) -> Dict[str, float]:
32
 
33
+ img = model_transforms[model_name](Image.open(img_file)).unsqueeze(0)
 
 
 
 
 
34
  with torch.inference_mode():
35
+ preds = torch.softmax(self.models[model_name](img)[0], dim=0)[0].cpu().numpy()
36
  return dict(zip(self.classes, preds))
37
 
38
  def launch(self):
custom_transformer/__pycache__/embedding.cpython-310.pyc DELETED
Binary file (3.07 kB)
 
custom_transformer/__pycache__/encoder.cpython-310.pyc DELETED
Binary file (4.55 kB)
 
custom_transformer/__pycache__/vit.cpython-310.pyc DELETED
Binary file (1.87 kB)
 
custom_transformer/embedding.py CHANGED
@@ -1,24 +1,15 @@
1
  import torch
2
  import torch.nn as nn
3
-
4
  import math
5
 
6
- # Use that for fancy colored prints
7
- from termcolor import colored
8
-
9
  DEBUG = False
10
 
11
  class PatchEmbedding(nn.Module):
12
 
13
  def __init__(self, in_channels: int = 3, embedding_dim: int = 768, patch_size: int = 16) -> None:
14
 
15
- super().__init__()
16
-
17
- # Linear projection:
18
  self.linear_projection = nn.Conv2d(in_channels=in_channels, out_channels=embedding_dim, kernel_size=patch_size, stride=patch_size)
19
-
20
- # Flattening:
21
- self.flatten = nn.Flatten(start_dim=2)
22
 
23
  def forward(self, x: torch.Tensor) -> torch.Tensor:
24
 
@@ -30,7 +21,7 @@ class PatchEmbedding(nn.Module):
30
  if DEBUG: print(f'Linearly projected input: {x.shape} [batch_size, embedding_dim, sqrt(n_patches), sqrt(n_patches)]')
31
 
32
  # Flattening: [batch_size, embedding_dim, n_patches]
33
- x = self.flatten(x)
34
  if DEBUG: print(f'Flattening of last 2 dimensions of linear projection: {x.shape} [batch_size, embedding_dim, n_patches]')
35
 
36
  # Transpose last 2 dimensions: [batch_size, n_patches, embedding_dim]
@@ -43,9 +34,9 @@ class Embedding(nn.Module):
43
 
44
  def __init__(self, image_size: int = 224, in_channels: int = 3, embedding_dim: int = 768, patch_size: int = 16) -> None:
45
 
46
- super().__init__()
47
 
48
- assert (image_size * image_size) % (patch_size * patch_size) == 0
49
 
50
  self.n_patches = (image_size * image_size) // (patch_size * patch_size)
51
  if DEBUG: print(f'Total number of patches: {self.n_patches}, i.e. {int(math.sqrt(self.n_patches))} x {int(math.sqrt(self.n_patches))}')
@@ -79,4 +70,4 @@ if __name__ == '__main__':
79
  sample_image_batch = torch.rand(5,3,224,224)
80
  embedding = Embedding()
81
  out = embedding(sample_image_batch)
82
- print(out)
 
1
  import torch
2
  import torch.nn as nn
 
3
  import math
4
 
 
 
 
5
  DEBUG = False
6
 
7
  class PatchEmbedding(nn.Module):
8
 
9
  def __init__(self, in_channels: int = 3, embedding_dim: int = 768, patch_size: int = 16) -> None:
10
 
11
+ super(PatchEmbedding, self).__init__()
 
 
12
  self.linear_projection = nn.Conv2d(in_channels=in_channels, out_channels=embedding_dim, kernel_size=patch_size, stride=patch_size)
 
 
 
13
 
14
  def forward(self, x: torch.Tensor) -> torch.Tensor:
15
 
 
21
  if DEBUG: print(f'Linearly projected input: {x.shape} [batch_size, embedding_dim, sqrt(n_patches), sqrt(n_patches)]')
22
 
23
  # Flattening: [batch_size, embedding_dim, n_patches]
24
+ x = x.flatten(start_dim=2)
25
  if DEBUG: print(f'Flattening of last 2 dimensions of linear projection: {x.shape} [batch_size, embedding_dim, n_patches]')
26
 
27
  # Transpose last 2 dimensions: [batch_size, n_patches, embedding_dim]
 
34
 
35
  def __init__(self, image_size: int = 224, in_channels: int = 3, embedding_dim: int = 768, patch_size: int = 16) -> None:
36
 
37
+ super(Embedding, self).__init__()
38
 
39
+ assert image_size % patch_size == 0
40
 
41
  self.n_patches = (image_size * image_size) // (patch_size * patch_size)
42
  if DEBUG: print(f'Total number of patches: {self.n_patches}, i.e. {int(math.sqrt(self.n_patches))} x {int(math.sqrt(self.n_patches))}')
 
70
  sample_image_batch = torch.rand(5,3,224,224)
71
  embedding = Embedding()
72
  out = embedding(sample_image_batch)
73
+ print(out.shape)
custom_transformer/encoder.py CHANGED
@@ -1,5 +1,6 @@
1
  import torch
2
  import torch.nn as nn
 
3
 
4
  DEBUG = False
5
 
@@ -7,7 +8,7 @@ class MultiHeadSelfAttention(nn.Module):
7
 
8
  def __init__(self, embedding_dim: int = 768, num_heads: int = 12) -> None:
9
 
10
- super().__init__()
11
 
12
  self.num_heads = num_heads
13
  self.head_dim = embedding_dim // num_heads
@@ -28,7 +29,7 @@ class MultiHeadSelfAttention(nn.Module):
28
  q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
29
  if DEBUG: print(f'Swap patches and head to have the head come first: {q.shape} [batch_size, num_heads, n_patches, head_dim]')
30
 
31
- attention_scores = torch.matmul(q, k.mT) / (self.head_dim ** .5)
32
  if DEBUG: print(f'Compute attention scores for each head (scaled dot product): {attention_scores.shape} [batch_size, num_heads, n_patches, n_patches]')
33
 
34
  attention_weights = torch.softmax(attention_scores, dim=-1)
@@ -43,7 +44,6 @@ class MultiHeadSelfAttention(nn.Module):
43
  weighted_sum = weighted_sum.view(*weighted_sum.shape[:-2], -1)
44
  if DEBUG: print(f'Recover the original dimensions by merging the last 2: {weighted_sum.shape} [batch_size, n_patches, embedding_dim]')
45
 
46
-
47
  output = self.out_w(weighted_sum)
48
  if DEBUG: print(f'(Output) Linear projection of the weighted sum: {output.shape} [batch_size, num_heads, n_patches, embedding_dim]')
49
 
@@ -53,7 +53,7 @@ class MultiHeadSelfAttention(nn.Module):
53
  class MSABlock(nn.Module):
54
 
55
  def __init__(self, embedding_dim: int = 768, num_heads: int = 12) -> None:
56
- super().__init__()
57
  self.msa = MultiHeadSelfAttention(embedding_dim=embedding_dim, num_heads=num_heads)
58
  self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
59
 
@@ -64,7 +64,7 @@ class MSABlock(nn.Module):
64
  class MLPBlock(nn.Module):
65
 
66
  def __init__(self, embedding_dim: int = 768, hidden_size: int = 3072) -> None:
67
- super().__init__()
68
  self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
69
  self.mlp = nn.Sequential(
70
  nn.Linear(in_features=embedding_dim, out_features=hidden_size),
@@ -79,7 +79,7 @@ class MLPBlock(nn.Module):
79
  class TransformerEncoderBlock(nn.Module):
80
 
81
  def __init__(self, embedding_dim: int = 768, hidden_size: int = 3072, num_heads: int = 12) -> None:
82
- super().__init__()
83
  self.msa = MSABlock(embedding_dim=embedding_dim, num_heads=num_heads)
84
  self.mlp = MLPBlock(embedding_dim=embedding_dim, hidden_size=hidden_size)
85
 
 
1
  import torch
2
  import torch.nn as nn
3
+ import math
4
 
5
  DEBUG = False
6
 
 
8
 
9
  def __init__(self, embedding_dim: int = 768, num_heads: int = 12) -> None:
10
 
11
+ super(MultiHeadSelfAttention, self).__init__()
12
 
13
  self.num_heads = num_heads
14
  self.head_dim = embedding_dim // num_heads
 
29
  q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
30
  if DEBUG: print(f'Swap patches and head to have the head come first: {q.shape} [batch_size, num_heads, n_patches, head_dim]')
31
 
32
+ attention_scores = torch.matmul(q, k.mT) / math.sqrt(self.head_dim)
33
  if DEBUG: print(f'Compute attention scores for each head (scaled dot product): {attention_scores.shape} [batch_size, num_heads, n_patches, n_patches]')
34
 
35
  attention_weights = torch.softmax(attention_scores, dim=-1)
 
44
  weighted_sum = weighted_sum.view(*weighted_sum.shape[:-2], -1)
45
  if DEBUG: print(f'Recover the original dimensions by merging the last 2: {weighted_sum.shape} [batch_size, n_patches, embedding_dim]')
46
 
 
47
  output = self.out_w(weighted_sum)
48
  if DEBUG: print(f'(Output) Linear projection of the weighted sum: {output.shape} [batch_size, num_heads, n_patches, embedding_dim]')
49
 
 
53
  class MSABlock(nn.Module):
54
 
55
  def __init__(self, embedding_dim: int = 768, num_heads: int = 12) -> None:
56
+ super(MSABlock, self).__init__()
57
  self.msa = MultiHeadSelfAttention(embedding_dim=embedding_dim, num_heads=num_heads)
58
  self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
59
 
 
64
  class MLPBlock(nn.Module):
65
 
66
  def __init__(self, embedding_dim: int = 768, hidden_size: int = 3072) -> None:
67
+ super(MLPBlock, self).__init__()
68
  self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
69
  self.mlp = nn.Sequential(
70
  nn.Linear(in_features=embedding_dim, out_features=hidden_size),
 
79
  class TransformerEncoderBlock(nn.Module):
80
 
81
  def __init__(self, embedding_dim: int = 768, hidden_size: int = 3072, num_heads: int = 12) -> None:
82
+ super(TransformerEncoderBlock, self).__init__()
83
  self.msa = MSABlock(embedding_dim=embedding_dim, num_heads=num_heads)
84
  self.mlp = MLPBlock(embedding_dim=embedding_dim, hidden_size=hidden_size)
85
 
custom_transformer/vit.py CHANGED
@@ -1,8 +1,6 @@
1
  import torch
2
  import torch.nn as nn
3
 
4
- import sys
5
- sys.path.append('..')
6
  from custom_transformer.embedding import Embedding
7
  from custom_transformer.encoder import TransformerEncoderBlock
8
 
@@ -18,7 +16,7 @@ class ViT(nn.Module):
18
  num_heads: int = 12,
19
  num_classes: int = 3) -> None:
20
 
21
- super().__init__()
22
 
23
  self.embedding = Embedding(image_size=image_size, in_channels=in_channels, embedding_dim=embedding_dim, patch_size=patch_size)
24
  self.transformer_encoders = nn.Sequential(
 
1
  import torch
2
  import torch.nn as nn
3
 
 
 
4
  from custom_transformer.embedding import Embedding
5
  from custom_transformer.encoder import TransformerEncoderBlock
6
 
 
16
  num_heads: int = 12,
17
  num_classes: int = 3) -> None:
18
 
19
+ super(ViT, self).__init__()
20
 
21
  self.embedding = Embedding(image_size=image_size, in_channels=in_channels, embedding_dim=embedding_dim, patch_size=patch_size)
22
  self.transformer_encoders = nn.Sequential(
model.py DELETED
@@ -1,49 +0,0 @@
1
- import lightning as L
2
- from lightning.pytorch.utilities.model_summary import ModelSummary
3
-
4
- import torch
5
- import torch.nn.functional as F
6
- import torch.nn as nn
7
-
8
- import torchmetrics
9
- from torchvision import transforms
10
-
11
- from typing import Optional
12
-
13
- class ClassifierModel(L.LightningModule):
14
-
15
- def __init__(self, model: nn.Module, image_size: int = 500, learning_rate: float = 1e-3, num_classes: int = 3,
16
- train_transform: Optional[transforms.Compose] = None, val_transform: Optional[transforms.Compose] = None) -> None:
17
- super().__init__()
18
- self.model = model
19
- self.learning_rate = learning_rate
20
- self.example_input_array = torch.Tensor(5, 3, image_size, image_size)
21
- self.f1_score = torchmetrics.F1Score(task='multiclass', num_classes=num_classes)
22
- self.train_transform = train_transform
23
- self.val_transform = val_transform
24
-
25
- def forward(self, x: torch.Tensor) -> torch.Tensor:
26
- return self.model(x)
27
-
28
- def print_summary(self) -> None:
29
- print(ModelSummary(self, max_depth=-1))
30
-
31
- def configure_optimizers(self) -> torch.optim.Optimizer:
32
- return torch.optim.Adam(params=self.model.parameters(), lr=self.learning_rate)
33
-
34
- def training_step(self, batch: tuple, batch_idx: int) -> float:
35
- X, y = batch
36
- y_pred = self(X)
37
- loss = F.cross_entropy(y_pred, y)
38
- self.log_dict({'Train loss': loss, f'Train F1 score': self.f1_score(y_pred, y)},
39
- on_step=False, on_epoch=True)
40
- return loss
41
-
42
- def validation_step(self, batch: tuple, batch_idx: int) -> float:
43
- X, y = batch
44
- y_pred = self(X)
45
- loss = F.cross_entropy(y_pred, y)
46
- self.log_dict({'Validation loss': loss, f'Validation F1 score': self.f1_score(y_pred, y)},
47
- on_step=False, on_epoch=True)
48
- return loss
49
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/{my_vit.pth → my_vit.pt} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab24ef32f02959f02793f24eb8d1a87b7fe0bccd5989367b55b527e39bbb75a
3
- size 378692280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a1ac6330e5dc1daaa83c6f4c46c1177e3543c24c165a269a49f6d02e0611c7
3
+ size 343273238
models/{pretrained_vit.pth → pretrained_vit.pt} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35925a38eead26593bb6b028900224ef80fe380334cb3c33cb4932af84f5f221
3
- size 346305720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06ff0b0f58ab0a686c5d3d752b691bb6dd722fa6dcc0e928d8a18328557ee559
3
+ size 343265286
transforms.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchvision import transforms, models
2
+ from typing import Literal, Dict
3
+
4
+ _weights = models.ViT_B_16_Weights.DEFAULT
5
+
6
+ model_transforms: Dict[Literal['Custom', 'Pretrained'], transforms.Compose] = {
7
+ 'Custom': transforms.Compose([
8
+ transforms.Resize((224, 224)),
9
+ transforms.ToTensor()
10
+ ]),
11
+ 'Pretrained': _weights.transforms()
12
+ }