| import os |
| import pickle |
| import random |
| import shutil |
|
|
| import cv2 |
| import matplotlib.pyplot as plt |
| import numpy as np |
|
|
| from data.dataset import get_transform |
|
|
|
|
| def summarize_dataset(data: dict): |
| print(f"Training authors: {len(data['train'].keys())} \t Testing authors: {len(data['test'].keys())}") |
| training_images = sum([len(data['train'][k]) for k in data['train'].keys()]) |
| testing_images = sum([len(data['test'][k]) for k in data['test'].keys()]) |
| print(f"Training images: {training_images} \t Testing images: {testing_images}") |
|
|
|
|
| def compare_data(path_a: str, path_b: str): |
| with open(path_a, 'rb') as f: |
| data_a = pickle.load(f) |
| summarize_dataset(data_a) |
|
|
| with open(path_b, 'rb') as f: |
| data_b = pickle.load(f) |
| summarize_dataset(data_b) |
|
|
| training_a = data_a['train'] |
| training_b = data_b['train'] |
|
|
| training_a = {int(k): v for k, v in training_a.items()} |
| training_b = {int(k): v for k, v in training_b.items()} |
|
|
| while True: |
| author = random.choice(list(training_a.keys())) |
|
|
| if author in training_b.keys(): |
| author_images_a = [np.array(im_dict["img"]) for im_dict in training_a[author]] |
| author_images_b = [np.array(im_dict["img"]) for im_dict in training_b[author]] |
|
|
| labels_a = [str(im_dict["label"]) for im_dict in training_a[author]] |
| labels_b = [str(im_dict["label"]) for im_dict in training_b[author]] |
|
|
| vis_a = np.hstack(author_images_a[:10]) |
| vis_b = np.hstack(author_images_b[:10]) |
|
|
| cv2.imshow("Author a", vis_a) |
| cv2.imshow("Author b", vis_b) |
|
|
| cv2.waitKey(0) |
|
|
| else: |
| print(f"Author: {author} not found in second dataset") |
|
|
|
|
| def show_dataset(path: str, samples: int = 10): |
| with open(path, 'rb') as f: |
| data = pickle.load(f) |
| summarize_dataset(data) |
|
|
| training = data['train'] |
|
|
| author = training['013'] |
| author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in author] |
|
|
| for img in author_images: |
| cv2.imshow('image', img) |
| cv2.waitKey(0) |
|
|
| for author in list(training.keys()): |
|
|
| author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in training[author]] |
| labels = [str(im_dict["label"]) for im_dict in training[author]] |
|
|
| vis = np.hstack(author_images[:samples]) |
| print(f"Author: {author}") |
| cv2.destroyAllWindows() |
| cv2.imshow("vis", vis) |
| cv2.waitKey(0) |
|
|
|
|
| def test_transform(path: str): |
| with open(path, 'rb') as f: |
| data = pickle.load(f) |
| summarize_dataset(data) |
|
|
| training = data['train'] |
| transform = get_transform(grayscale=True) |
|
|
| for author_id in training.keys(): |
| author = training[author_id] |
| for image_dict in author: |
| original_image = image_dict['img'].convert('L') |
| transformed_image = transform(original_image).detach().numpy() |
| restored_image = (((transformed_image + 1) / 2) * 255).astype(np.uint8) |
| restored_image = np.squeeze(restored_image) |
| original_image = np.array(original_image) |
|
|
| wrong_pixels = (original_image != restored_image).astype(np.uint8) * 255 |
|
|
| combined = np.hstack((restored_image, original_image, wrong_pixels)) |
|
|
| cv2.imshow("original", original_image) |
| cv2.imshow("restored", restored_image) |
| cv2.imshow("combined", combined) |
|
|
| f, ax = plt.subplots(1, 2) |
| ax[0].hist(original_image.flatten()) |
| ax[1].hist(restored_image.flatten()) |
| plt.show() |
|
|
| cv2.waitKey(0) |
|
|
| def dump_words(): |
| data_path = r"..\files\IAM-32.pickle" |
|
|
| p_mark = 'point' |
| p = '.' |
|
|
| with open(data_path, 'rb') as f: |
| data = pickle.load(f) |
|
|
| training = data['train'] |
|
|
| target_folder = f"../saved_images/debug/{p_mark}" |
|
|
| if os.path.exists(target_folder): |
| shutil.rmtree(target_folder) |
|
|
| os.mkdir(target_folder) |
|
|
| count = 0 |
|
|
| for author in list(training.keys()): |
|
|
| author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in training[author]] |
| labels = [str(im_dict["label"]) for im_dict in training[author]] |
|
|
| for img, label in zip(author_images, labels): |
| if p in label: |
| cv2.imwrite(os.path.join(target_folder, f"{count}.png"), img) |
| count += 1 |
|
|
|
|
| if __name__ == "__main__": |
| test_transform("../files/IAM-32.pickle") |
| |
| |
|
|