Upload 8 files

Browse files

Files changed (8) hide show

GreggRecognition-main/LICENSE +21 -0
GreggRecognition-main/src/config.py +9 -0
GreggRecognition-main/src/dataloader.py +113 -0
GreggRecognition-main/src/env_tests.py +3 -0
GreggRecognition-main/src/main.py +45 -0
GreggRecognition-main/src/model.py +66 -0
GreggRecognition-main/src/requirements.txt +7 -0
GreggRecognition-main/src/utils/datafixerupper.py +30 -0

GreggRecognition-main/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 aw
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

GreggRecognition-main/src/config.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+class CONFIG:
+    vocabulary_size = 10000
+    embedding_size = 256
+    RNN_size = 512
+    drop_out = 0.5
+    data_folder = os.path.join(os.path.dirname(__file__), 'data')
+    val_proportion = '0.1'

GreggRecognition-main/src/dataloader.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import os
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from PIL import Image, ImageOps
+from config import CONFIG
+def rgb2grey(rgb):
+    return np.dot(rgb[..., :3], [0.299, 0.587, 0.114])
+import os
+import numpy as np
+from config import CONFIG
+def data_split():
+    config = CONFIG()
+    folder = config.data_folder
+    val_proportion = float(config.val_proportion)  # Convert to float
+    files = os.listdir(folder)
+    period = int(np.round(1 / val_proportion))
+    # Split logic here
+    train_files = files[::period]
+    val_files = files[1::period]
+    test_files = files[2::period]
+    max_H, max_W, max_seq_length = 256, 256, 100  # Example values
+    return train_files, val_files, test_files, max_H, max_W, max_seq_length
+def augmentation_simple(filename, aug_type, max_H, max_W, folder=CONFIG.data_folder):
+    image = rgb2grey(mpimg.imread(os.path.join(folder, filename)))
+    image_augmented = np.ones((max_H, max_W))
+    h, w = np.shape(image)
+    stride_0, stride_1 = max_H - h, (max_W - w) // 2
+    offset = ((aug_type % 2) * stride_0, (aug_type % 3) * stride_1)
+    image_augmented[offset[0]: h + offset[0], offset[1]: w + offset[1]] = image
+    return image_augmented
+def augmentation_nine(filename, aug_type, max_H, max_W, folder=CONFIG.data_folder):
+    image_augmented = np.ones((max_H, max_W))
+    image = Image.open(os.path.join(folder, filename)).convert('RGB')
+    w_ori, h_ori = image.size
+    rotate_ind = aug_type % 3
+    scale_ind = aug_type // 3
+    image = ImageOps.invert(image)
+    if rotate_ind == 1:
+        image = image.rotate(2, expand=True)
+    elif rotate_ind == 2:
+        image = image.rotate(-2, expand=True)
+    image = ImageOps.invert(image)
+    h, w = image.size
+    if scale_ind == 1:
+        h, w = int(np.floor(h * 0.98)), int(np.floor(w * 0.98))
+        image = image.resize((h, w))
+    elif scale_ind == 2:
+        h, w = int(np.floor(h * 0.96)), int(np.floor(w * 0.96))
+        image = image.resize((h, w))
+    image = rgb2grey(np.array(image) / 255)
+    h, w = np.shape(image)
+    stride_0, stride_1 = (max_H - 10 - h_ori) // 2, (max_W - 10 - w_ori) // 2
+    offset = ((aug_type % 3) * stride_0, (aug_type % 3) * stride_1)
+    try:
+        image_augmented[offset[0]: h + offset[0], offset[1]: w + offset[1]] = image
+    except ValueError:
+        print(filename)
+    return image_augmented
+class ShorthandGenerationDataset(Dataset):
+    def __init__(self, file_list, max_H, max_W, max_label_leng, aug_types, channels=1):
+        self.file_list = file_list
+        self.H, self.W = max_H, max_W
+        self.channels = channels
+        self.vocabulary = 'abcdefghijklmnopqrstuvwxyz+#'
+        self.dict_c2i = {char: idx for idx, char in enumerate(self.vocabulary)}
+        self.max_label_length = max_label_leng
+        self.max_context_length = self.max_label_length - 1
+        self.aug_types = aug_types
+        self.instance_indices_by_length = {i: [] for i in range(1, self.max_context_length + 1)}
+        for file in file_list:
+            seq = '+' + file[:-4] + '#'
+            max_context_len = len(seq) - 1
+            for length in range(1, max_context_len + 1):
+                for aug in range(self.aug_types):
+                    self.instance_indices_by_length[length].append([seq, aug, length])
+        self.total_size = sum(len(self.instance_indices_by_length[i]) for i in range(1, self.max_context_length))
+    def __len__(self):
+        return self.total_size
+    def __getitem__(self, idx):
+        context_length = 1
+        while sum(len(self.instance_indices_by_length[length]) for length in range(1, context_length + 1)) <= idx:
+            context_length += 1
+        num_batch_in_length = idx - sum(len(self.instance_indices_by_length[length]) for length in range(1, context_length))
+        starting_index = num_batch_in_length
+        seq, augmentation_type, instance_context_length = self.instance_indices_by_length[context_length][starting_index]
+        file_name = seq[1:-1] + '.png'
+        img = augmentation_nine(file_name, augmentation_type, self.H, self.W)
+        img = np.expand_dims(img, axis=0)  # Add channel dimension
+        x_context = np.array([self.dict_c2i[char] for char in seq[:instance_context_length]])
+        y = self.dict_c2i[seq[instance_context_length]]
+        return torch.tensor(img, dtype=torch.float32), torch.tensor(x_context, dtype=torch.long), torch.tensor(y, dtype=torch.long)

GreggRecognition-main/src/env_tests.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ import torch
2	+
3	+ print("CUDA available:", torch.cuda.is_available())

GreggRecognition-main/src/main.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+from torch.utils.data import DataLoader
+from torch.nn.utils.rnn import pad_sequence
+from model import Model
+from dataloader import ShorthandGenerationDataset, data_split
+from config import CONFIG
+from tqdm import tqdm  # Import tqdm for progress bar
+def collate_fn(batch):
+    # print(batch) # debugging
+    imgs, labels, additional = zip(*batch)
+    imgs = pad_sequence(imgs, batch_first=True, padding_value=0)
+    labels = pad_sequence(labels, batch_first=True, padding_value=0)
+    additional = torch.stack(additional)
+    return imgs, labels, additional
+# Split the data
+train_files, val_files, test_files, max_H, max_W, max_seq_length = data_split()
+# Initialize dataset and dataloaders
+train_dataset = ShorthandGenerationDataset(train_files, max_H, max_W, aug_types=9, max_label_leng=max_seq_length, channels=1)
+val_dataset = ShorthandGenerationDataset(val_files, max_H, max_W, aug_types=1, max_label_leng=max_seq_length, channels=1)
+test_dataset = ShorthandGenerationDataset(test_files, max_H, max_W, aug_types=1, max_label_leng=max_seq_length, channels=1)
+train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
+val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
+test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
+# Initialize model
+config = CONFIG()
+model = Model(max_H, max_W, config)
+# Example training loop
+num_epochs = 10  # Define the number of epochs
+for epoch in range(num_epochs):
+    model.train()
+    for imgs, labels, additional in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
+        # Training step
+        pass
+    model.eval()
+    with torch.no_grad():
+        for imgs, labels, additional in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}/{num_epochs}"):
+            # Validation step
+            pass

GreggRecognition-main/src/model.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class FeatureExtractor(nn.Module):
+    def __init__(self):
+        super(FeatureExtractor, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
+        self.bn2 = nn.BatchNorm2d(32)
+        self.pool = nn.MaxPool2d(kernel_size=2)
+        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        self.bn3 = nn.BatchNorm2d(64)
+        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
+        self.bn4 = nn.BatchNorm2d(64)
+        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
+        self.bn5 = nn.BatchNorm2d(128)
+        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
+        self.bn6 = nn.BatchNorm2d(128)
+        self.conv7 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
+        self.bn7 = nn.BatchNorm2d(256)
+        self.conv8 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        self.bn8 = nn.BatchNorm2d(256)
+    def forward(self, x):
+        x = F.relu(self.bn1(self.conv1(x)))
+        x = F.relu(self.bn2(self.conv2(x)))
+        x = self.pool(x)
+        x = F.relu(self.bn3(self.conv3(x)))
+        x = F.relu(self.bn4(self.conv4(x)))
+        x = self.pool(x)
+        x = F.relu(self.bn5(self.conv5(x)))
+        x = F.relu(self.bn6(self.conv6(x)))
+        x = self.pool(x)
+        x = F.relu(self.bn7(self.conv7(x)))
+        x = F.relu(self.bn8(self.conv8(x)))
+        return x
+class Model(nn.Module):
+    def __init__(self, H, W, config):
+        super(Model, self).__init__()
+        self.feature_extractor = FeatureExtractor()
+        self.flatten = nn.Flatten()
+        self.fc = nn.Linear(256 * (H // 8) * (W // 8), config.RNN_size)
+        self.embedding = nn.Embedding(config.vocabulary_size, config.embedding_size)
+        self.gru = nn.GRU(config.embedding_size, config.RNN_size, batch_first=True)
+        self.dropout = nn.Dropout(config.drop_out)
+        self.fc_out = nn.Linear(config.RNN_size, config.vocabulary_size)
+    def forward(self, img, x_context):
+        img_f = self.feature_extractor(img)
+        img_f = self.flatten(img_f)
+        img_f = F.relu(self.fc(img_f))
+        x_seq_embedding = self.embedding(x_context)
+        h_t, _ = self.gru(x_seq_embedding, img_f.unsqueeze(0))
+        h_t_dropped = self.dropout(h_t)
+        predictions = F.softmax(self.fc_out(h_t_dropped), dim=-1)
+        return predictions

GreggRecognition-main/src/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+torchvision
+numpy
+pandas
+scikit-learn
+matplotlib
+tqdm

GreggRecognition-main/src/utils/datafixerupper.py ADDED Viewed

	@@ -0,0 +1,30 @@

+#
+# Takes gregg-1916 dataset with labels in filenames and converts to CSV-labeled data
+#
+import os
+import csv
+from pathlib import Path
+data_folder = '../data'
+output_folder = '../data-labeled'
+def generate_csv_from_data_folder(data_folder, output_folder, csv_filename='labels.csv'):
+    data_folder = Path(data_folder)
+    output_folder = Path(output_folder)
+    output_folder.mkdir(parents=True, exist_ok=True)
+    csv_path = output_folder / csv_filename
+    with open(csv_path, mode='w', newline='') as csv_file:
+        writer = csv.writer(csv_file)
+        writer.writerow(['filename', 'label'])
+        for file in data_folder.iterdir():
+            if file.is_file():
+                label = file.stem  # Get the filename without the extension
+                writer.writerow([file.name, label])
+    print(f"CSV file generated at: {csv_path}")
+generate_csv_from_data_folder(data_folder, output_folder)