Spaces:

i4ata
/

CustomTransformerClassification

Paused

App Files Files Community

i4ata commited on May 25, 2024

Commit

5feebb1

1 Parent(s): 4517cfd

smol update

Browse files

Files changed (13) hide show

.gitignore +160 -0
__pycache__/model.cpython-310.pyc +0 -0
app.py +19 -15
custom_transformer/__pycache__/embedding.cpython-310.pyc +0 -0
custom_transformer/__pycache__/encoder.cpython-310.pyc +0 -0
custom_transformer/__pycache__/vit.cpython-310.pyc +0 -0
custom_transformer/embedding.py +5 -14
custom_transformer/encoder.py +6 -6
custom_transformer/vit.py +1 -3
model.py +0 -49
models/{my_vit.pth → my_vit.pt} +2 -2
models/{pretrained_vit.pth → pretrained_vit.pt} +2 -2
transforms.py +12 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

__pycache__/model.cpython-310.pyc DELETED Viewed

Binary file (2.44 kB)

app.py CHANGED Viewed

@@ -1,34 +1,38 @@
 import gradio as gr
 from PIL import Image
 import os
-import torch
-from model import ClassifierModel
 from typing import List, Dict, Union
 class GradioApp:
     def __init__(self) -> None:
-        self.models: Dict[str, Union[str, ClassifierModel]] = {
-            'Custom': 'models/my_vit.pth',
-            'Pretrained': 'models/pretrained_vit.pth'
         }
         with open('classname.txt') as f:
             self.classes: List[str] = [line.strip() for line in f.readlines()]
     def predict(self, img_file: str, model_name: str) -> Dict[str, float]:
-        # Lazy loading of models
-        if isinstance(self.models[model_name], str):
-            self.models[model_name] = torch.load(self.models[model_name], map_location='cpu')
-            self.models[model_name].eval()
-        img = torch.unsqueeze(self.models[model_name].val_transform(Image.open(img_file)), 0)
         with torch.inference_mode():
-            preds = torch.softmax(self.models[model_name](img), dim=1)[0].numpy()
         return dict(zip(self.classes, preds))
     def launch(self):

+import torch
+import torch.nn as nn
+from torchvision import models
 import gradio as gr
 from PIL import Image
 import os
 from typing import List, Dict, Union
+from custom_transformer.vit import ViT
+from transforms import model_transforms
 class GradioApp:
     def __init__(self) -> None:
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        custom = ViT().to(device).eval()
+        custom.load_state_dict(torch.load('models/my_vit.pt', map_location=device))
+        pretrained = models.vit_b_16().to(device).eval()
+        pretrained.load_state_dict(torch.load('models/pretrained_vit.pt', map_location=device))
+        self.models: Dict[str, Union[str, nn.Module]] = {
+            'Custom': custom,
+            'Pretrained': pretrained
         }
         with open('classname.txt') as f:
             self.classes: List[str] = [line.strip() for line in f.readlines()]
     def predict(self, img_file: str, model_name: str) -> Dict[str, float]:
+        img = model_transforms[model_name](Image.open(img_file)).unsqueeze(0)
         with torch.inference_mode():
+            preds = torch.softmax(self.models[model_name](img)[0], dim=0)[0].cpu().numpy()
         return dict(zip(self.classes, preds))
     def launch(self):

custom_transformer/__pycache__/embedding.cpython-310.pyc DELETED Viewed

Binary file (3.07 kB)

custom_transformer/__pycache__/encoder.cpython-310.pyc DELETED Viewed

Binary file (4.55 kB)

custom_transformer/__pycache__/vit.cpython-310.pyc DELETED Viewed

Binary file (1.87 kB)

custom_transformer/embedding.py CHANGED Viewed

@@ -1,24 +1,15 @@
 import torch
 import torch.nn as nn
 import math
-# Use that for fancy colored prints
-from termcolor import colored
 DEBUG = False
 class PatchEmbedding(nn.Module):
     def __init__(self, in_channels: int = 3, embedding_dim: int = 768, patch_size: int = 16) -> None:
-        super().__init__()
-        # Linear projection:
         self.linear_projection = nn.Conv2d(in_channels=in_channels, out_channels=embedding_dim, kernel_size=patch_size, stride=patch_size)
-        # Flattening:
-        self.flatten = nn.Flatten(start_dim=2)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -30,7 +21,7 @@ class PatchEmbedding(nn.Module):
         if DEBUG: print(f'Linearly projected input: {x.shape} [batch_size, embedding_dim, sqrt(n_patches), sqrt(n_patches)]')
         # Flattening: [batch_size, embedding_dim, n_patches]
-        x = self.flatten(x)
         if DEBUG: print(f'Flattening of last 2 dimensions of linear projection: {x.shape} [batch_size, embedding_dim, n_patches]')
         # Transpose last 2 dimensions: [batch_size, n_patches, embedding_dim]
@@ -43,9 +34,9 @@ class Embedding(nn.Module):
     def __init__(self, image_size: int = 224, in_channels: int = 3, embedding_dim: int = 768, patch_size: int = 16) -> None:
-        super().__init__()
-        assert (image_size * image_size) % (patch_size * patch_size) == 0
         self.n_patches = (image_size * image_size) // (patch_size * patch_size)
         if DEBUG: print(f'Total number of patches: {self.n_patches}, i.e. {int(math.sqrt(self.n_patches))} x {int(math.sqrt(self.n_patches))}')
@@ -79,4 +70,4 @@ if __name__ == '__main__':
     sample_image_batch = torch.rand(5,3,224,224)
     embedding = Embedding()
     out = embedding(sample_image_batch)
-    print(out)

 import torch
 import torch.nn as nn
 import math
 DEBUG = False
 class PatchEmbedding(nn.Module):
     def __init__(self, in_channels: int = 3, embedding_dim: int = 768, patch_size: int = 16) -> None:
+        super(PatchEmbedding, self).__init__()
         self.linear_projection = nn.Conv2d(in_channels=in_channels, out_channels=embedding_dim, kernel_size=patch_size, stride=patch_size)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         if DEBUG: print(f'Linearly projected input: {x.shape} [batch_size, embedding_dim, sqrt(n_patches), sqrt(n_patches)]')
         # Flattening: [batch_size, embedding_dim, n_patches]
+        x = x.flatten(start_dim=2)
         if DEBUG: print(f'Flattening of last 2 dimensions of linear projection: {x.shape} [batch_size, embedding_dim, n_patches]')
         # Transpose last 2 dimensions: [batch_size, n_patches, embedding_dim]
     def __init__(self, image_size: int = 224, in_channels: int = 3, embedding_dim: int = 768, patch_size: int = 16) -> None:
+        super(Embedding, self).__init__()
+        assert image_size % patch_size == 0
         self.n_patches = (image_size * image_size) // (patch_size * patch_size)
         if DEBUG: print(f'Total number of patches: {self.n_patches}, i.e. {int(math.sqrt(self.n_patches))} x {int(math.sqrt(self.n_patches))}')
     sample_image_batch = torch.rand(5,3,224,224)
     embedding = Embedding()
     out = embedding(sample_image_batch)
+    print(out.shape)

custom_transformer/encoder.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import torch
 import torch.nn as nn
 DEBUG = False
@@ -7,7 +8,7 @@ class MultiHeadSelfAttention(nn.Module):
     def __init__(self, embedding_dim: int = 768, num_heads: int = 12) -> None:
-        super().__init__()
         self.num_heads = num_heads
         self.head_dim = embedding_dim // num_heads
@@ -28,7 +29,7 @@ class MultiHeadSelfAttention(nn.Module):
         q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
         if DEBUG: print(f'Swap patches and head to have the head come first: {q.shape} [batch_size, num_heads, n_patches, head_dim]')
-        attention_scores = torch.matmul(q, k.mT) / (self.head_dim ** .5)
         if DEBUG: print(f'Compute attention scores for each head (scaled dot product): {attention_scores.shape} [batch_size, num_heads, n_patches, n_patches]')
         attention_weights = torch.softmax(attention_scores, dim=-1)
@@ -43,7 +44,6 @@ class MultiHeadSelfAttention(nn.Module):
         weighted_sum = weighted_sum.view(*weighted_sum.shape[:-2], -1)
         if DEBUG: print(f'Recover the original dimensions by merging the last 2: {weighted_sum.shape} [batch_size, n_patches, embedding_dim]')
         output = self.out_w(weighted_sum)
         if DEBUG: print(f'(Output) Linear projection of the weighted sum: {output.shape} [batch_size, num_heads, n_patches, embedding_dim]')
@@ -53,7 +53,7 @@ class MultiHeadSelfAttention(nn.Module):
 class MSABlock(nn.Module):
     def __init__(self, embedding_dim: int = 768, num_heads: int = 12) -> None:
-        super().__init__()
         self.msa = MultiHeadSelfAttention(embedding_dim=embedding_dim, num_heads=num_heads)
         self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
@@ -64,7 +64,7 @@ class MSABlock(nn.Module):
 class MLPBlock(nn.Module):
     def __init__(self, embedding_dim: int = 768, hidden_size: int = 3072) -> None:
-        super().__init__()
         self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
         self.mlp = nn.Sequential(
             nn.Linear(in_features=embedding_dim, out_features=hidden_size),
@@ -79,7 +79,7 @@ class MLPBlock(nn.Module):
 class TransformerEncoderBlock(nn.Module):
     def __init__(self, embedding_dim: int = 768, hidden_size: int = 3072, num_heads: int = 12) -> None:
-        super().__init__()
         self.msa = MSABlock(embedding_dim=embedding_dim, num_heads=num_heads)
         self.mlp = MLPBlock(embedding_dim=embedding_dim, hidden_size=hidden_size)

 import torch
 import torch.nn as nn
+import math
 DEBUG = False
     def __init__(self, embedding_dim: int = 768, num_heads: int = 12) -> None:
+        super(MultiHeadSelfAttention, self).__init__()
         self.num_heads = num_heads
         self.head_dim = embedding_dim // num_heads
         q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
         if DEBUG: print(f'Swap patches and head to have the head come first: {q.shape} [batch_size, num_heads, n_patches, head_dim]')
+        attention_scores = torch.matmul(q, k.mT) / math.sqrt(self.head_dim)
         if DEBUG: print(f'Compute attention scores for each head (scaled dot product): {attention_scores.shape} [batch_size, num_heads, n_patches, n_patches]')
         attention_weights = torch.softmax(attention_scores, dim=-1)
         weighted_sum = weighted_sum.view(*weighted_sum.shape[:-2], -1)
         if DEBUG: print(f'Recover the original dimensions by merging the last 2: {weighted_sum.shape} [batch_size, n_patches, embedding_dim]')
         output = self.out_w(weighted_sum)
         if DEBUG: print(f'(Output) Linear projection of the weighted sum: {output.shape} [batch_size, num_heads, n_patches, embedding_dim]')
 class MSABlock(nn.Module):
     def __init__(self, embedding_dim: int = 768, num_heads: int = 12) -> None:
+        super(MSABlock, self).__init__()
         self.msa = MultiHeadSelfAttention(embedding_dim=embedding_dim, num_heads=num_heads)
         self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
 class MLPBlock(nn.Module):
     def __init__(self, embedding_dim: int = 768, hidden_size: int = 3072) -> None:
+        super(MLPBlock, self).__init__()
         self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
         self.mlp = nn.Sequential(
             nn.Linear(in_features=embedding_dim, out_features=hidden_size),
 class TransformerEncoderBlock(nn.Module):
     def __init__(self, embedding_dim: int = 768, hidden_size: int = 3072, num_heads: int = 12) -> None:
+        super(TransformerEncoderBlock, self).__init__()
         self.msa = MSABlock(embedding_dim=embedding_dim, num_heads=num_heads)
         self.mlp = MLPBlock(embedding_dim=embedding_dim, hidden_size=hidden_size)

custom_transformer/vit.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import torch
 import torch.nn as nn
-import sys
-sys.path.append('..')
 from custom_transformer.embedding import Embedding
 from custom_transformer.encoder import TransformerEncoderBlock
@@ -18,7 +16,7 @@ class ViT(nn.Module):
                  num_heads: int = 12,
                  num_classes: int = 3) -> None:
-        super().__init__()
         self.embedding = Embedding(image_size=image_size, in_channels=in_channels, embedding_dim=embedding_dim, patch_size=patch_size)
         self.transformer_encoders = nn.Sequential(

 import torch
 import torch.nn as nn
 from custom_transformer.embedding import Embedding
 from custom_transformer.encoder import TransformerEncoderBlock
                  num_heads: int = 12,
                  num_classes: int = 3) -> None:
+        super(ViT, self).__init__()
         self.embedding = Embedding(image_size=image_size, in_channels=in_channels, embedding_dim=embedding_dim, patch_size=patch_size)
         self.transformer_encoders = nn.Sequential(

model.py DELETED Viewed

@@ -1,49 +0,0 @@
-import lightning as L
-from lightning.pytorch.utilities.model_summary import ModelSummary
-import torch
-import torch.nn.functional as F
-import torch.nn as nn
-import torchmetrics
-from torchvision import transforms
-from typing import Optional
-class ClassifierModel(L.LightningModule):
-    def __init__(self, model: nn.Module, image_size: int = 500, learning_rate: float = 1e-3, num_classes: int = 3,
-                 train_transform: Optional[transforms.Compose] = None, val_transform: Optional[transforms.Compose] = None) -> None:
-        super().__init__()
-        self.model = model
-        self.learning_rate = learning_rate
-        self.example_input_array = torch.Tensor(5, 3, image_size, image_size)
-        self.f1_score = torchmetrics.F1Score(task='multiclass', num_classes=num_classes)
-        self.train_transform = train_transform
-        self.val_transform = val_transform
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self.model(x)
-    def print_summary(self) -> None:
-        print(ModelSummary(self, max_depth=-1))
-    def configure_optimizers(self) -> torch.optim.Optimizer:
-        return torch.optim.Adam(params=self.model.parameters(), lr=self.learning_rate)
-    def training_step(self, batch: tuple, batch_idx: int) -> float:
-        X, y = batch
-        y_pred = self(X)
-        loss = F.cross_entropy(y_pred, y)
-        self.log_dict({'Train loss': loss, f'Train F1 score': self.f1_score(y_pred, y)},
-                      on_step=False, on_epoch=True)
-        return loss
-    def validation_step(self, batch: tuple, batch_idx: int) -> float:
-        X, y = batch
-        y_pred = self(X)
-        loss = F.cross_entropy(y_pred, y)
-        self.log_dict({'Validation loss': loss, f'Validation F1 score': self.f1_score(y_pred, y)},
-                      on_step=False, on_epoch=True)
-        return loss

models/{my_vit.pth → my_vit.pt} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aab24ef32f02959f02793f24eb8d1a87b7fe0bccd5989367b55b527e39bbb75a
-size 378692280

 version https://git-lfs.github.com/spec/v1
+oid sha256:59a1ac6330e5dc1daaa83c6f4c46c1177e3543c24c165a269a49f6d02e0611c7
+size 343273238

models/{pretrained_vit.pth → pretrained_vit.pt} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35925a38eead26593bb6b028900224ef80fe380334cb3c33cb4932af84f5f221
-size 346305720

 version https://git-lfs.github.com/spec/v1
+oid sha256:06ff0b0f58ab0a686c5d3d752b691bb6dd722fa6dcc0e928d8a18328557ee559
+size 343265286

transforms.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from torchvision import transforms, models
+from typing import Literal, Dict
+_weights = models.ViT_B_16_Weights.DEFAULT
+model_transforms: Dict[Literal['Custom', 'Pretrained'], transforms.Compose] = {
+    'Custom': transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor()
+    ]),
+    'Pretrained': _weights.transforms()
+}