a0a7 commited on
Commit
3663dd0
·
verified ·
1 Parent(s): 717366f

Upload 8 files

Browse files
GreggRecognition-main/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 aw
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
GreggRecognition-main/src/config.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ class CONFIG:
4
+ vocabulary_size = 10000
5
+ embedding_size = 256
6
+ RNN_size = 512
7
+ drop_out = 0.5
8
+ data_folder = os.path.join(os.path.dirname(__file__), 'data')
9
+ val_proportion = '0.1'
GreggRecognition-main/src/dataloader.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import torch
4
+ from torch.utils.data import Dataset
5
+ from PIL import Image, ImageOps
6
+ from config import CONFIG
7
+
8
+ def rgb2grey(rgb):
9
+ return np.dot(rgb[..., :3], [0.299, 0.587, 0.114])
10
+
11
+ import os
12
+ import numpy as np
13
+ from config import CONFIG
14
+
15
+ def data_split():
16
+ config = CONFIG()
17
+ folder = config.data_folder
18
+ val_proportion = float(config.val_proportion) # Convert to float
19
+
20
+ files = os.listdir(folder)
21
+ period = int(np.round(1 / val_proportion))
22
+
23
+ # Split logic here
24
+ train_files = files[::period]
25
+ val_files = files[1::period]
26
+ test_files = files[2::period]
27
+ max_H, max_W, max_seq_length = 256, 256, 100 # Example values
28
+
29
+ return train_files, val_files, test_files, max_H, max_W, max_seq_length
30
+
31
+ def augmentation_simple(filename, aug_type, max_H, max_W, folder=CONFIG.data_folder):
32
+ image = rgb2grey(mpimg.imread(os.path.join(folder, filename)))
33
+ image_augmented = np.ones((max_H, max_W))
34
+ h, w = np.shape(image)
35
+ stride_0, stride_1 = max_H - h, (max_W - w) // 2
36
+ offset = ((aug_type % 2) * stride_0, (aug_type % 3) * stride_1)
37
+ image_augmented[offset[0]: h + offset[0], offset[1]: w + offset[1]] = image
38
+ return image_augmented
39
+
40
+ def augmentation_nine(filename, aug_type, max_H, max_W, folder=CONFIG.data_folder):
41
+ image_augmented = np.ones((max_H, max_W))
42
+ image = Image.open(os.path.join(folder, filename)).convert('RGB')
43
+ w_ori, h_ori = image.size
44
+ rotate_ind = aug_type % 3
45
+ scale_ind = aug_type // 3
46
+
47
+ image = ImageOps.invert(image)
48
+ if rotate_ind == 1:
49
+ image = image.rotate(2, expand=True)
50
+ elif rotate_ind == 2:
51
+ image = image.rotate(-2, expand=True)
52
+ image = ImageOps.invert(image)
53
+
54
+ h, w = image.size
55
+ if scale_ind == 1:
56
+ h, w = int(np.floor(h * 0.98)), int(np.floor(w * 0.98))
57
+ image = image.resize((h, w))
58
+ elif scale_ind == 2:
59
+ h, w = int(np.floor(h * 0.96)), int(np.floor(w * 0.96))
60
+ image = image.resize((h, w))
61
+
62
+ image = rgb2grey(np.array(image) / 255)
63
+ h, w = np.shape(image)
64
+ stride_0, stride_1 = (max_H - 10 - h_ori) // 2, (max_W - 10 - w_ori) // 2
65
+ offset = ((aug_type % 3) * stride_0, (aug_type % 3) * stride_1)
66
+ try:
67
+ image_augmented[offset[0]: h + offset[0], offset[1]: w + offset[1]] = image
68
+ except ValueError:
69
+ print(filename)
70
+ return image_augmented
71
+
72
+ class ShorthandGenerationDataset(Dataset):
73
+ def __init__(self, file_list, max_H, max_W, max_label_leng, aug_types, channels=1):
74
+ self.file_list = file_list
75
+ self.H, self.W = max_H, max_W
76
+ self.channels = channels
77
+ self.vocabulary = 'abcdefghijklmnopqrstuvwxyz+#'
78
+ self.dict_c2i = {char: idx for idx, char in enumerate(self.vocabulary)}
79
+ self.max_label_length = max_label_leng
80
+ self.max_context_length = self.max_label_length - 1
81
+ self.aug_types = aug_types
82
+ self.instance_indices_by_length = {i: [] for i in range(1, self.max_context_length + 1)}
83
+
84
+ for file in file_list:
85
+ seq = '+' + file[:-4] + '#'
86
+ max_context_len = len(seq) - 1
87
+ for length in range(1, max_context_len + 1):
88
+ for aug in range(self.aug_types):
89
+ self.instance_indices_by_length[length].append([seq, aug, length])
90
+
91
+ self.total_size = sum(len(self.instance_indices_by_length[i]) for i in range(1, self.max_context_length))
92
+
93
+ def __len__(self):
94
+ return self.total_size
95
+
96
+ def __getitem__(self, idx):
97
+ context_length = 1
98
+ while sum(len(self.instance_indices_by_length[length]) for length in range(1, context_length + 1)) <= idx:
99
+ context_length += 1
100
+
101
+ num_batch_in_length = idx - sum(len(self.instance_indices_by_length[length]) for length in range(1, context_length))
102
+ starting_index = num_batch_in_length
103
+
104
+ seq, augmentation_type, instance_context_length = self.instance_indices_by_length[context_length][starting_index]
105
+
106
+ file_name = seq[1:-1] + '.png'
107
+ img = augmentation_nine(file_name, augmentation_type, self.H, self.W)
108
+ img = np.expand_dims(img, axis=0) # Add channel dimension
109
+
110
+ x_context = np.array([self.dict_c2i[char] for char in seq[:instance_context_length]])
111
+ y = self.dict_c2i[seq[instance_context_length]]
112
+
113
+ return torch.tensor(img, dtype=torch.float32), torch.tensor(x_context, dtype=torch.long), torch.tensor(y, dtype=torch.long)
GreggRecognition-main/src/env_tests.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import torch
2
+
3
+ print("CUDA available:", torch.cuda.is_available())
GreggRecognition-main/src/main.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import DataLoader
3
+ from torch.nn.utils.rnn import pad_sequence
4
+ from model import Model
5
+ from dataloader import ShorthandGenerationDataset, data_split
6
+ from config import CONFIG
7
+ from tqdm import tqdm # Import tqdm for progress bar
8
+
9
+ def collate_fn(batch):
10
+ # print(batch) # debugging
11
+ imgs, labels, additional = zip(*batch)
12
+ imgs = pad_sequence(imgs, batch_first=True, padding_value=0)
13
+ labels = pad_sequence(labels, batch_first=True, padding_value=0)
14
+ additional = torch.stack(additional)
15
+ return imgs, labels, additional
16
+
17
+ # Split the data
18
+ train_files, val_files, test_files, max_H, max_W, max_seq_length = data_split()
19
+
20
+ # Initialize dataset and dataloaders
21
+ train_dataset = ShorthandGenerationDataset(train_files, max_H, max_W, aug_types=9, max_label_leng=max_seq_length, channels=1)
22
+ val_dataset = ShorthandGenerationDataset(val_files, max_H, max_W, aug_types=1, max_label_leng=max_seq_length, channels=1)
23
+ test_dataset = ShorthandGenerationDataset(test_files, max_H, max_W, aug_types=1, max_label_leng=max_seq_length, channels=1)
24
+
25
+ train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
26
+ val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
27
+ test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)
28
+
29
+ # Initialize model
30
+ config = CONFIG()
31
+ model = Model(max_H, max_W, config)
32
+
33
+ # Example training loop
34
+ num_epochs = 10 # Define the number of epochs
35
+ for epoch in range(num_epochs):
36
+ model.train()
37
+ for imgs, labels, additional in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
38
+ # Training step
39
+ pass
40
+
41
+ model.eval()
42
+ with torch.no_grad():
43
+ for imgs, labels, additional in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}/{num_epochs}"):
44
+ # Validation step
45
+ pass
GreggRecognition-main/src/model.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ class FeatureExtractor(nn.Module):
6
+ def __init__(self):
7
+ super(FeatureExtractor, self).__init__()
8
+ self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
9
+ self.bn1 = nn.BatchNorm2d(32)
10
+ self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
11
+ self.bn2 = nn.BatchNorm2d(32)
12
+ self.pool = nn.MaxPool2d(kernel_size=2)
13
+
14
+ self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
15
+ self.bn3 = nn.BatchNorm2d(64)
16
+ self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
17
+ self.bn4 = nn.BatchNorm2d(64)
18
+
19
+ self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
20
+ self.bn5 = nn.BatchNorm2d(128)
21
+ self.conv6 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
22
+ self.bn6 = nn.BatchNorm2d(128)
23
+
24
+ self.conv7 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
25
+ self.bn7 = nn.BatchNorm2d(256)
26
+ self.conv8 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
27
+ self.bn8 = nn.BatchNorm2d(256)
28
+
29
+ def forward(self, x):
30
+ x = F.relu(self.bn1(self.conv1(x)))
31
+ x = F.relu(self.bn2(self.conv2(x)))
32
+ x = self.pool(x)
33
+
34
+ x = F.relu(self.bn3(self.conv3(x)))
35
+ x = F.relu(self.bn4(self.conv4(x)))
36
+ x = self.pool(x)
37
+
38
+ x = F.relu(self.bn5(self.conv5(x)))
39
+ x = F.relu(self.bn6(self.conv6(x)))
40
+ x = self.pool(x)
41
+
42
+ x = F.relu(self.bn7(self.conv7(x)))
43
+ x = F.relu(self.bn8(self.conv8(x)))
44
+ return x
45
+
46
+ class Model(nn.Module):
47
+ def __init__(self, H, W, config):
48
+ super(Model, self).__init__()
49
+ self.feature_extractor = FeatureExtractor()
50
+ self.flatten = nn.Flatten()
51
+ self.fc = nn.Linear(256 * (H // 8) * (W // 8), config.RNN_size)
52
+ self.embedding = nn.Embedding(config.vocabulary_size, config.embedding_size)
53
+ self.gru = nn.GRU(config.embedding_size, config.RNN_size, batch_first=True)
54
+ self.dropout = nn.Dropout(config.drop_out)
55
+ self.fc_out = nn.Linear(config.RNN_size, config.vocabulary_size)
56
+
57
+ def forward(self, img, x_context):
58
+ img_f = self.feature_extractor(img)
59
+ img_f = self.flatten(img_f)
60
+ img_f = F.relu(self.fc(img_f))
61
+
62
+ x_seq_embedding = self.embedding(x_context)
63
+ h_t, _ = self.gru(x_seq_embedding, img_f.unsqueeze(0))
64
+ h_t_dropped = self.dropout(h_t)
65
+ predictions = F.softmax(self.fc_out(h_t_dropped), dim=-1)
66
+ return predictions
GreggRecognition-main/src/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ numpy
4
+ pandas
5
+ scikit-learn
6
+ matplotlib
7
+ tqdm
GreggRecognition-main/src/utils/datafixerupper.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Takes gregg-1916 dataset with labels in filenames and converts to CSV-labeled data
3
+ #
4
+
5
+ import os
6
+ import csv
7
+ from pathlib import Path
8
+
9
+ data_folder = '../data'
10
+ output_folder = '../data-labeled'
11
+
12
+ def generate_csv_from_data_folder(data_folder, output_folder, csv_filename='labels.csv'):
13
+ data_folder = Path(data_folder)
14
+ output_folder = Path(output_folder)
15
+ output_folder.mkdir(parents=True, exist_ok=True)
16
+
17
+ csv_path = output_folder / csv_filename
18
+
19
+ with open(csv_path, mode='w', newline='') as csv_file:
20
+ writer = csv.writer(csv_file)
21
+ writer.writerow(['filename', 'label'])
22
+
23
+ for file in data_folder.iterdir():
24
+ if file.is_file():
25
+ label = file.stem # Get the filename without the extension
26
+ writer.writerow([file.name, label])
27
+
28
+ print(f"CSV file generated at: {csv_path}")
29
+
30
+ generate_csv_from_data_folder(data_folder, output_folder)