Upload 8 files
Browse files- GreggRecognition-main/LICENSE +21 -0
- GreggRecognition-main/src/config.py +9 -0
- GreggRecognition-main/src/dataloader.py +113 -0
- GreggRecognition-main/src/env_tests.py +3 -0
- GreggRecognition-main/src/main.py +45 -0
- GreggRecognition-main/src/model.py +66 -0
- GreggRecognition-main/src/requirements.txt +7 -0
- GreggRecognition-main/src/utils/datafixerupper.py +30 -0
GreggRecognition-main/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2024 aw
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
GreggRecognition-main/src/config.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
class CONFIG:
|
| 4 |
+
vocabulary_size = 10000
|
| 5 |
+
embedding_size = 256
|
| 6 |
+
RNN_size = 512
|
| 7 |
+
drop_out = 0.5
|
| 8 |
+
data_folder = os.path.join(os.path.dirname(__file__), 'data')
|
| 9 |
+
val_proportion = '0.1'
|
GreggRecognition-main/src/dataloader.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import numpy as np
|
| 3 |
+
import torch
|
| 4 |
+
from torch.utils.data import Dataset
|
| 5 |
+
from PIL import Image, ImageOps
|
| 6 |
+
from config import CONFIG
|
| 7 |
+
|
| 8 |
+
def rgb2grey(rgb):
|
| 9 |
+
return np.dot(rgb[..., :3], [0.299, 0.587, 0.114])
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import numpy as np
|
| 13 |
+
from config import CONFIG
|
| 14 |
+
|
| 15 |
+
def data_split():
|
| 16 |
+
config = CONFIG()
|
| 17 |
+
folder = config.data_folder
|
| 18 |
+
val_proportion = float(config.val_proportion) # Convert to float
|
| 19 |
+
|
| 20 |
+
files = os.listdir(folder)
|
| 21 |
+
period = int(np.round(1 / val_proportion))
|
| 22 |
+
|
| 23 |
+
# Split logic here
|
| 24 |
+
train_files = files[::period]
|
| 25 |
+
val_files = files[1::period]
|
| 26 |
+
test_files = files[2::period]
|
| 27 |
+
max_H, max_W, max_seq_length = 256, 256, 100 # Example values
|
| 28 |
+
|
| 29 |
+
return train_files, val_files, test_files, max_H, max_W, max_seq_length
|
| 30 |
+
|
| 31 |
+
def augmentation_simple(filename, aug_type, max_H, max_W, folder=CONFIG.data_folder):
|
| 32 |
+
image = rgb2grey(mpimg.imread(os.path.join(folder, filename)))
|
| 33 |
+
image_augmented = np.ones((max_H, max_W))
|
| 34 |
+
h, w = np.shape(image)
|
| 35 |
+
stride_0, stride_1 = max_H - h, (max_W - w) // 2
|
| 36 |
+
offset = ((aug_type % 2) * stride_0, (aug_type % 3) * stride_1)
|
| 37 |
+
image_augmented[offset[0]: h + offset[0], offset[1]: w + offset[1]] = image
|
| 38 |
+
return image_augmented
|
| 39 |
+
|
| 40 |
+
def augmentation_nine(filename, aug_type, max_H, max_W, folder=CONFIG.data_folder):
|
| 41 |
+
image_augmented = np.ones((max_H, max_W))
|
| 42 |
+
image = Image.open(os.path.join(folder, filename)).convert('RGB')
|
| 43 |
+
w_ori, h_ori = image.size
|
| 44 |
+
rotate_ind = aug_type % 3
|
| 45 |
+
scale_ind = aug_type // 3
|
| 46 |
+
|
| 47 |
+
image = ImageOps.invert(image)
|
| 48 |
+
if rotate_ind == 1:
|
| 49 |
+
image = image.rotate(2, expand=True)
|
| 50 |
+
elif rotate_ind == 2:
|
| 51 |
+
image = image.rotate(-2, expand=True)
|
| 52 |
+
image = ImageOps.invert(image)
|
| 53 |
+
|
| 54 |
+
h, w = image.size
|
| 55 |
+
if scale_ind == 1:
|
| 56 |
+
h, w = int(np.floor(h * 0.98)), int(np.floor(w * 0.98))
|
| 57 |
+
image = image.resize((h, w))
|
| 58 |
+
elif scale_ind == 2:
|
| 59 |
+
h, w = int(np.floor(h * 0.96)), int(np.floor(w * 0.96))
|
| 60 |
+
image = image.resize((h, w))
|
| 61 |
+
|
| 62 |
+
image = rgb2grey(np.array(image) / 255)
|
| 63 |
+
h, w = np.shape(image)
|
| 64 |
+
stride_0, stride_1 = (max_H - 10 - h_ori) // 2, (max_W - 10 - w_ori) // 2
|
| 65 |
+
offset = ((aug_type % 3) * stride_0, (aug_type % 3) * stride_1)
|
| 66 |
+
try:
|
| 67 |
+
image_augmented[offset[0]: h + offset[0], offset[1]: w + offset[1]] = image
|
| 68 |
+
except ValueError:
|
| 69 |
+
print(filename)
|
| 70 |
+
return image_augmented
|
| 71 |
+
|
| 72 |
+
class ShorthandGenerationDataset(Dataset):
|
| 73 |
+
def __init__(self, file_list, max_H, max_W, max_label_leng, aug_types, channels=1):
|
| 74 |
+
self.file_list = file_list
|
| 75 |
+
self.H, self.W = max_H, max_W
|
| 76 |
+
self.channels = channels
|
| 77 |
+
self.vocabulary = 'abcdefghijklmnopqrstuvwxyz+#'
|
| 78 |
+
self.dict_c2i = {char: idx for idx, char in enumerate(self.vocabulary)}
|
| 79 |
+
self.max_label_length = max_label_leng
|
| 80 |
+
self.max_context_length = self.max_label_length - 1
|
| 81 |
+
self.aug_types = aug_types
|
| 82 |
+
self.instance_indices_by_length = {i: [] for i in range(1, self.max_context_length + 1)}
|
| 83 |
+
|
| 84 |
+
for file in file_list:
|
| 85 |
+
seq = '+' + file[:-4] + '#'
|
| 86 |
+
max_context_len = len(seq) - 1
|
| 87 |
+
for length in range(1, max_context_len + 1):
|
| 88 |
+
for aug in range(self.aug_types):
|
| 89 |
+
self.instance_indices_by_length[length].append([seq, aug, length])
|
| 90 |
+
|
| 91 |
+
self.total_size = sum(len(self.instance_indices_by_length[i]) for i in range(1, self.max_context_length))
|
| 92 |
+
|
| 93 |
+
def __len__(self):
|
| 94 |
+
return self.total_size
|
| 95 |
+
|
| 96 |
+
def __getitem__(self, idx):
|
| 97 |
+
context_length = 1
|
| 98 |
+
while sum(len(self.instance_indices_by_length[length]) for length in range(1, context_length + 1)) <= idx:
|
| 99 |
+
context_length += 1
|
| 100 |
+
|
| 101 |
+
num_batch_in_length = idx - sum(len(self.instance_indices_by_length[length]) for length in range(1, context_length))
|
| 102 |
+
starting_index = num_batch_in_length
|
| 103 |
+
|
| 104 |
+
seq, augmentation_type, instance_context_length = self.instance_indices_by_length[context_length][starting_index]
|
| 105 |
+
|
| 106 |
+
file_name = seq[1:-1] + '.png'
|
| 107 |
+
img = augmentation_nine(file_name, augmentation_type, self.H, self.W)
|
| 108 |
+
img = np.expand_dims(img, axis=0) # Add channel dimension
|
| 109 |
+
|
| 110 |
+
x_context = np.array([self.dict_c2i[char] for char in seq[:instance_context_length]])
|
| 111 |
+
y = self.dict_c2i[seq[instance_context_length]]
|
| 112 |
+
|
| 113 |
+
return torch.tensor(img, dtype=torch.float32), torch.tensor(x_context, dtype=torch.long), torch.tensor(y, dtype=torch.long)
|
GreggRecognition-main/src/env_tests.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
print("CUDA available:", torch.cuda.is_available())
|
GreggRecognition-main/src/main.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch.utils.data import DataLoader
|
| 3 |
+
from torch.nn.utils.rnn import pad_sequence
|
| 4 |
+
from model import Model
|
| 5 |
+
from dataloader import ShorthandGenerationDataset, data_split
|
| 6 |
+
from config import CONFIG
|
| 7 |
+
from tqdm import tqdm # Import tqdm for progress bar
|
| 8 |
+
|
| 9 |
+
def collate_fn(batch):
|
| 10 |
+
# print(batch) # debugging
|
| 11 |
+
imgs, labels, additional = zip(*batch)
|
| 12 |
+
imgs = pad_sequence(imgs, batch_first=True, padding_value=0)
|
| 13 |
+
labels = pad_sequence(labels, batch_first=True, padding_value=0)
|
| 14 |
+
additional = torch.stack(additional)
|
| 15 |
+
return imgs, labels, additional
|
| 16 |
+
|
| 17 |
+
# Split the data
|
| 18 |
+
train_files, val_files, test_files, max_H, max_W, max_seq_length = data_split()
|
| 19 |
+
|
| 20 |
+
# Initialize dataset and dataloaders
|
| 21 |
+
train_dataset = ShorthandGenerationDataset(train_files, max_H, max_W, aug_types=9, max_label_leng=max_seq_length, channels=1)
|
| 22 |
+
val_dataset = ShorthandGenerationDataset(val_files, max_H, max_W, aug_types=1, max_label_leng=max_seq_length, channels=1)
|
| 23 |
+
test_dataset = ShorthandGenerationDataset(test_files, max_H, max_W, aug_types=1, max_label_leng=max_seq_length, channels=1)
|
| 24 |
+
|
| 25 |
+
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
|
| 26 |
+
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
|
| 27 |
+
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
|
| 28 |
+
|
| 29 |
+
# Initialize model
|
| 30 |
+
config = CONFIG()
|
| 31 |
+
model = Model(max_H, max_W, config)
|
| 32 |
+
|
| 33 |
+
# Example training loop
|
| 34 |
+
num_epochs = 10 # Define the number of epochs
|
| 35 |
+
for epoch in range(num_epochs):
|
| 36 |
+
model.train()
|
| 37 |
+
for imgs, labels, additional in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
|
| 38 |
+
# Training step
|
| 39 |
+
pass
|
| 40 |
+
|
| 41 |
+
model.eval()
|
| 42 |
+
with torch.no_grad():
|
| 43 |
+
for imgs, labels, additional in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}/{num_epochs}"):
|
| 44 |
+
# Validation step
|
| 45 |
+
pass
|
GreggRecognition-main/src/model.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
class FeatureExtractor(nn.Module):
|
| 6 |
+
def __init__(self):
|
| 7 |
+
super(FeatureExtractor, self).__init__()
|
| 8 |
+
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
|
| 9 |
+
self.bn1 = nn.BatchNorm2d(32)
|
| 10 |
+
self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
|
| 11 |
+
self.bn2 = nn.BatchNorm2d(32)
|
| 12 |
+
self.pool = nn.MaxPool2d(kernel_size=2)
|
| 13 |
+
|
| 14 |
+
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
|
| 15 |
+
self.bn3 = nn.BatchNorm2d(64)
|
| 16 |
+
self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
|
| 17 |
+
self.bn4 = nn.BatchNorm2d(64)
|
| 18 |
+
|
| 19 |
+
self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
|
| 20 |
+
self.bn5 = nn.BatchNorm2d(128)
|
| 21 |
+
self.conv6 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
|
| 22 |
+
self.bn6 = nn.BatchNorm2d(128)
|
| 23 |
+
|
| 24 |
+
self.conv7 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
|
| 25 |
+
self.bn7 = nn.BatchNorm2d(256)
|
| 26 |
+
self.conv8 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
|
| 27 |
+
self.bn8 = nn.BatchNorm2d(256)
|
| 28 |
+
|
| 29 |
+
def forward(self, x):
|
| 30 |
+
x = F.relu(self.bn1(self.conv1(x)))
|
| 31 |
+
x = F.relu(self.bn2(self.conv2(x)))
|
| 32 |
+
x = self.pool(x)
|
| 33 |
+
|
| 34 |
+
x = F.relu(self.bn3(self.conv3(x)))
|
| 35 |
+
x = F.relu(self.bn4(self.conv4(x)))
|
| 36 |
+
x = self.pool(x)
|
| 37 |
+
|
| 38 |
+
x = F.relu(self.bn5(self.conv5(x)))
|
| 39 |
+
x = F.relu(self.bn6(self.conv6(x)))
|
| 40 |
+
x = self.pool(x)
|
| 41 |
+
|
| 42 |
+
x = F.relu(self.bn7(self.conv7(x)))
|
| 43 |
+
x = F.relu(self.bn8(self.conv8(x)))
|
| 44 |
+
return x
|
| 45 |
+
|
| 46 |
+
class Model(nn.Module):
|
| 47 |
+
def __init__(self, H, W, config):
|
| 48 |
+
super(Model, self).__init__()
|
| 49 |
+
self.feature_extractor = FeatureExtractor()
|
| 50 |
+
self.flatten = nn.Flatten()
|
| 51 |
+
self.fc = nn.Linear(256 * (H // 8) * (W // 8), config.RNN_size)
|
| 52 |
+
self.embedding = nn.Embedding(config.vocabulary_size, config.embedding_size)
|
| 53 |
+
self.gru = nn.GRU(config.embedding_size, config.RNN_size, batch_first=True)
|
| 54 |
+
self.dropout = nn.Dropout(config.drop_out)
|
| 55 |
+
self.fc_out = nn.Linear(config.RNN_size, config.vocabulary_size)
|
| 56 |
+
|
| 57 |
+
def forward(self, img, x_context):
|
| 58 |
+
img_f = self.feature_extractor(img)
|
| 59 |
+
img_f = self.flatten(img_f)
|
| 60 |
+
img_f = F.relu(self.fc(img_f))
|
| 61 |
+
|
| 62 |
+
x_seq_embedding = self.embedding(x_context)
|
| 63 |
+
h_t, _ = self.gru(x_seq_embedding, img_f.unsqueeze(0))
|
| 64 |
+
h_t_dropped = self.dropout(h_t)
|
| 65 |
+
predictions = F.softmax(self.fc_out(h_t_dropped), dim=-1)
|
| 66 |
+
return predictions
|
GreggRecognition-main/src/requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
torchvision
|
| 3 |
+
numpy
|
| 4 |
+
pandas
|
| 5 |
+
scikit-learn
|
| 6 |
+
matplotlib
|
| 7 |
+
tqdm
|
GreggRecognition-main/src/utils/datafixerupper.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# Takes gregg-1916 dataset with labels in filenames and converts to CSV-labeled data
|
| 3 |
+
#
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import csv
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
data_folder = '../data'
|
| 10 |
+
output_folder = '../data-labeled'
|
| 11 |
+
|
| 12 |
+
def generate_csv_from_data_folder(data_folder, output_folder, csv_filename='labels.csv'):
|
| 13 |
+
data_folder = Path(data_folder)
|
| 14 |
+
output_folder = Path(output_folder)
|
| 15 |
+
output_folder.mkdir(parents=True, exist_ok=True)
|
| 16 |
+
|
| 17 |
+
csv_path = output_folder / csv_filename
|
| 18 |
+
|
| 19 |
+
with open(csv_path, mode='w', newline='') as csv_file:
|
| 20 |
+
writer = csv.writer(csv_file)
|
| 21 |
+
writer.writerow(['filename', 'label'])
|
| 22 |
+
|
| 23 |
+
for file in data_folder.iterdir():
|
| 24 |
+
if file.is_file():
|
| 25 |
+
label = file.stem # Get the filename without the extension
|
| 26 |
+
writer.writerow([file.name, label])
|
| 27 |
+
|
| 28 |
+
print(f"CSV file generated at: {csv_path}")
|
| 29 |
+
|
| 30 |
+
generate_csv_from_data_folder(data_folder, output_folder)
|