Spaces:

Thisissophia
/

3D-Image-Composer

Running

App Files Files Community

Thisissophia commited on Nov 13, 2024

Commit

69e2ef2

verified ·

1 Parent(s): b5e8944

Upload 87 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -0
__pycache__/creat_anaglyph.cpython-38.pyc +0 -0
__pycache__/deeplab_demo.cpython-38.pyc +0 -0
__pycache__/mypath.cpython-38.pyc +0 -0
anaglyph.png +3 -0
app.py +96 -0
creat_anaglyph.py +149 -0
dataloaders/__init__.py +56 -0
dataloaders/__pycache__/__init__.cpython-310.pyc +0 -0
dataloaders/__pycache__/__init__.cpython-38.pyc +0 -0
dataloaders/__pycache__/custom_transforms.cpython-310.pyc +0 -0
dataloaders/__pycache__/custom_transforms.cpython-38.pyc +0 -0
dataloaders/__pycache__/utils.cpython-310.pyc +0 -0
dataloaders/__pycache__/utils.cpython-38.pyc +0 -0
dataloaders/custom_transforms.py +165 -0
dataloaders/datasets/__init__.py +0 -0
dataloaders/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
dataloaders/datasets/__pycache__/__init__.cpython-38.pyc +0 -0
dataloaders/datasets/__pycache__/cityscapes.cpython-310.pyc +0 -0
dataloaders/datasets/__pycache__/cityscapes.cpython-38.pyc +0 -0
dataloaders/datasets/__pycache__/coco.cpython-310.pyc +0 -0
dataloaders/datasets/__pycache__/coco.cpython-38.pyc +0 -0
dataloaders/datasets/__pycache__/combine_dbs.cpython-310.pyc +0 -0
dataloaders/datasets/__pycache__/combine_dbs.cpython-38.pyc +0 -0
dataloaders/datasets/__pycache__/invoice.cpython-310.pyc +0 -0
dataloaders/datasets/__pycache__/invoice.cpython-38.pyc +0 -0
dataloaders/datasets/__pycache__/pascal.cpython-310.pyc +0 -0
dataloaders/datasets/__pycache__/pascal.cpython-38.pyc +0 -0
dataloaders/datasets/__pycache__/sbd.cpython-310.pyc +0 -0
dataloaders/datasets/__pycache__/sbd.cpython-38.pyc +0 -0
dataloaders/datasets/cityscapes.py +146 -0
dataloaders/datasets/coco.py +160 -0
dataloaders/datasets/combine_dbs.py +100 -0
dataloaders/datasets/invoice.py +145 -0
dataloaders/datasets/pascal.py +145 -0
dataloaders/datasets/sbd.py +129 -0
dataloaders/utils.py +111 -0
deeplab-mobilenet.pth.tar +3 -0
deeplab-resnet.pth.tar +3 -0
deeplab_demo.py +111 -0
end.py +90 -0
img/mask.png +0 -0
img/masked.png +0 -0
img/people.jpg +0 -0
img/scenery.jpg +3 -0
img/scenery2.jpg +3 -0
modeling/__init__.py +0 -0
modeling/__pycache__/__init__.cpython-310.pyc +0 -0
modeling/__pycache__/__init__.cpython-38.pyc +0 -0
modeling/__pycache__/aspp.cpython-310.pyc +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,5 @@

+anaglyph.png filter=lfs diff=lfs merge=lfs -text
+deeplab-mobilenet.pth.tar filter=lfs diff=lfs merge=lfs -text
+deeplab-resnet.pth.tar filter=lfs diff=lfs merge=lfs -text
+img/scenery.jpg filter=lfs diff=lfs merge=lfs -text
+img/scenery2.jpg filter=lfs diff=lfs merge=lfs -text

__pycache__/creat_anaglyph.cpython-38.pyc ADDED Viewed

Binary file (2.53 kB). View file

__pycache__/deeplab_demo.cpython-38.pyc ADDED Viewed

Binary file (3.46 kB). View file

__pycache__/mypath.cpython-38.pyc ADDED Viewed

Binary file (812 Bytes). View file

anaglyph.png ADDED Viewed

Git LFS Details

SHA256: fbbb5fd4ee33896d6cf1cf8c245f420778673c2323a2cd1203a490a79e2d63be
Pointer size: 133 Bytes
Size of remote file: 11.6 MB

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# hugging face requirements for app.py, main file for running the application
+# equivalent to end.py to be used in the hugging face inference API,which Hugging Face will recognize as the main file for running  application.
+# app.py
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+import gradio as gr
+from PIL import Image
+from deeplab_demo import get_people
+from creat_anaglyph import insert_person_to_stereo_gradio
+import torch
+from torchvision.transforms import ToPILImage
+# Define functions to process the person image and generate the anaglyph image
+def process_person_image(person_image):
+    masked_image_pil, grid_image = get_people(person_image)
+    if isinstance(masked_image_pil, torch.Tensor):
+        masked_image_pil = ToPILImage()(masked_image_pil)
+    if isinstance(grid_image, torch.Tensor):
+        grid_image = ToPILImage()(grid_image)
+    return masked_image_pil, grid_image
+# Define a function to generate the anaglyph image
+def generate_anaglyph(masked_image_pil, scenery_image, depth_option, custom_disparity):
+    # Define default disparities for non-custom options
+    # non-custom options: close, medium, far
+    depth_disparities = {
+        "close": 10,  # Adjust values as needed
+        "medium": 5,
+        "far": 2
+    }
+    # Use custom_disparity only if depth_option is "custom"
+    disparity = custom_disparity if depth_option == "custom" else depth_disparities.get(depth_option, 5)
+    # Ensure input is PIL image
+    if isinstance(masked_image_pil, torch.Tensor):
+        masked_image_pil = ToPILImage()(masked_image_pil)
+    if isinstance(scenery_image, torch.Tensor):
+        scenery_image = ToPILImage()(scenery_image)
+    anaglyph_image = insert_person_to_stereo_gradio(scenery_image, masked_image_pil, disparity)
+    if isinstance(anaglyph_image, torch.Tensor):
+        anaglyph_image = ToPILImage()(anaglyph_image)
+    return anaglyph_image
+# Create Gradio interface
+with gr.Blocks() as iface:
+    with gr.Row():
+        person_image_input = gr.Image(type="pil", label="Character image")
+        scenery_image_input = gr.Image(type="pil", label="Landscape images")
+        depth_option_input = gr.Dropdown(choices=["close", "medium", "far", "custom"], label="Depth Options")
+        custom_disparity_input = gr.Slider(minimum=0, maximum=50, step=1, label="Custom Depth Disparity", visible=False)
+    with gr.Row():
+        grid_image_output = gr.Image(type="pil", label="Grid", interactive=False)
+        masked_image_output = gr.Image(type="pil", label="Masked", interactive=False)
+        anaglyph_image_output = gr.Image(type="pil", label="Anaglyph", interactive=False)
+    # button1: Process the character image
+    process_button = gr.Button("Processing human images")
+    process_button.click(
+        fn=process_person_image,
+        inputs=person_image_input,
+        outputs=[masked_image_output, grid_image_output]
+    )
+    # define a function to update the visibility of the custom disparity slider based on the depth option
+    def update_custom_slider_visibility(depth_option):
+        return gr.update(visible=(depth_option == "custom"))
+    depth_option_input.change(
+        fn=update_custom_slider_visibility,
+        inputs=[depth_option_input],
+        outputs=custom_disparity_input
+    )
+    # button2: Generate anaglyph image
+    generate_button = gr.Button("Generate Anaglyph Image")
+    generate_button.click(
+        fn=generate_anaglyph,
+        inputs=[masked_image_output, scenery_image_input, depth_option_input, custom_disparity_input],
+        outputs=anaglyph_image_output
+    )
+# Launch the Gradio interface
+iface.launch()

creat_anaglyph.py ADDED Viewed

	@@ -0,0 +1,149 @@

+#file: creat_anaglyph.py
+# Description: This script creates a red-cyan anaglyph stereo image by inserting a person into a stereo image.
+from PIL import Image
+import numpy as np
+import torch
+from torchvision.transforms import ToPILImage
+# preprocess the human image to remove the black background
+def preprocess_person_image(person_image_path):
+    # uploaded human image
+    person_image = Image.open(person_image_path).convert('RGBA')
+    data = np.array(person_image)
+    # separate color channels
+    r, g, b, a = data.T
+    # define the threshold for black background
+    black_threshold = 1
+    black_areas = (r < black_threshold) & (g < black_threshold) & (b < black_threshold)
+    # set black background to transparent
+    data[..., 3][black_areas.T] = 0  # only modify the alpha channel
+    # create a new image
+    transparent_image = Image.fromarray(data)
+    return transparent_image
+# gradio compatible version of preprocess_person_image
+def preprocess_person_image_gradio(person_image):
+    # ensure the image is in RGBA mode
+    if person_image.mode != 'RGBA':
+        person_image = person_image.convert('RGBA')
+    # load the human image
+    data = np.array(person_image)
+    # separate color channels
+    r, g, b, a = data.T
+    # define the threshold for black background
+    black_threshold = 1
+    black_areas = (r < black_threshold) & (g < black_threshold) & (b < black_threshold)
+    # set black background to transparent
+    data[..., 3][black_areas.T] = 0  # 只修改 alpha 通道
+    # create a new image
+    transparent_image = Image.fromarray(data)
+    return transparent_image
+def insert_person_to_stereo(stereo_image_path, person_image_path, depth_option):
+    # load the stitched image
+    stereo_image = Image.open(stereo_image_path).convert('RGB')
+    width, height = stereo_image.size
+    # assume the stitched image is symmetrical
+    left_image = stereo_image.crop((0, 0, width // 2, height))
+    right_image = stereo_image.crop((width // 2, 0, width, height))
+    # preprocess the human image
+    person_image = preprocess_person_image(person_image_path)
+    person_width, person_height = person_image.size
+    # define disparity options based on image width
+    max_disparity = width // 20
+    disparity_options = {
+        'close': max_disparity// 5,
+        'medium': max_disparity // 15,
+        'far': max_disparity // 20
+    }
+    # get the corresponding disparity value
+    disparity = disparity_options.get(depth_option, max_disparity // 2)
+    # calculate the insertion position to align the bottom of the human image with the bottom of the scene image and center horizontally
+    x_position = (width // 4) - (person_width // 2) + disparity
+    y_position = height - person_height
+    # insert the human image into the left and right views
+    left_image.paste(person_image, (x_position, y_position), person_image)
+    right_image.paste(person_image, (x_position - disparity, y_position), person_image)
+    # combine the left and right views into a red-cyan stereo image
+    left_array = np.array(left_image) # convert the left image to an array
+    right_array = np.array(right_image) # convert the right image to an array
+    # create a red-cyan stereo image
+    anaglyph = np.zeros_like(left_array)
+    anaglyph[..., 0] = left_array[..., 0]  # red channel from left image
+    anaglyph[..., 1] = right_array[..., 1] # green channel from right image
+    anaglyph[..., 2] = right_array[..., 2] # blue channel from right image
+    # convert to an image and save
+    anaglyph_image = Image.fromarray(anaglyph) # convert the array to an image
+    anaglyph_image.save('anaglyph.png') # save the image
+# gradio compatible version of insert_person_to_stereo
+def insert_person_to_stereo_gradio(stereo_image, person_image, disparity):
+    # load the stitched image
+    # ensure left_image is in RGB mode
+    if person_image.mode != "RGBA":
+        masked_image_pil = person_image.convert("RGBA")
+    if stereo_image.mode != 'RGB':
+        stereo_image = stereo_image.convert('RGB')
+    width, height = stereo_image.size
+    # assume the stitched image is symmetrical
+    left_image = stereo_image.crop((0, 0, width // 2, height))
+    right_image = stereo_image.crop((width // 2, 0, width, height))
+    # preprocess the human image
+    person_image = preprocess_person_image_gradio(person_image)
+    person_width, person_height = person_image.size
+    # calculate the insertion position to align the bottom of the human image with the bottom of the scene image and center horizontally
+    x_position = (width // 4) - (person_width // 2) + disparity
+    y_position = height - person_height
+    # let's paste the person image into the left and right views
+    left_image.paste(person_image, (x_position, y_position), person_image)
+    right_image.paste(person_image, (x_position - disparity, y_position), person_image)
+    # combine the left and right views into a red-cyan stereo image
+    left_array = np.array(left_image)
+    right_array = np.array(right_image)
+    # create a red-cyan stereo image
+    anaglyph = np.zeros_like(left_array)
+    anaglyph[..., 0] = left_array[..., 0]  # red channel from left image
+    anaglyph[..., 1] = right_array[..., 1]  # green channel from right image
+    anaglyph[..., 2] = right_array[..., 2]  # blue channel from right image
+    # convert to an image and return
+    anaglyph_image = Image.fromarray(anaglyph)
+    return anaglyph_image
+# Example
+insert_person_to_stereo('img/scenery.jpg', 'img/masked.png', 'far')

dataloaders/__init__.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from dataloaders.datasets import cityscapes, coco, combine_dbs, pascal, sbd, invoice
+from torch.utils.data import DataLoader
+def make_data_loader(args, **kwargs):
+    if args.dataset == 'invoice':
+        train_set = invoice.VOCSegmentation(args, split='train')
+        val_set = invoice.VOCSegmentation(args, split='val')
+        if args.use_sbd:
+            sbd_train = sbd.SBDSegmentation(args, split=['train', 'val'])
+            train_set = combine_dbs.CombineDBs([train_set, sbd_train], excluded=[val_set])
+        num_class = train_set.NUM_CLASSES
+        train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, **kwargs)
+        val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, **kwargs)
+        test_loader = None
+        return train_loader, val_loader, test_loader, num_class
+    elif args.dataset == 'pascal':
+        train_set = pascal.VOCSegmentation(args, split='train')
+        val_set = pascal.VOCSegmentation(args, split='val')
+        if args.use_sbd:
+            sbd_train = sbd.SBDSegmentation(args, split=['train', 'val'])
+            train_set = combine_dbs.CombineDBs([train_set, sbd_train], excluded=[val_set])
+        num_class = train_set.NUM_CLASSES
+        train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, **kwargs)
+        val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, **kwargs)
+        test_loader = None
+        return train_loader, val_loader, test_loader, num_class
+    elif args.dataset == 'cityscapes':
+        train_set = cityscapes.CityscapesSegmentation(args, split='train')
+        val_set = cityscapes.CityscapesSegmentation(args, split='val')
+        test_set = cityscapes.CityscapesSegmentation(args, split='test')
+        num_class = train_set.NUM_CLASSES
+        train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, **kwargs)
+        val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, **kwargs)
+        test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, **kwargs)
+        return train_loader, val_loader, test_loader, num_class
+    elif args.dataset == 'coco':
+        train_set = coco.COCOSegmentation(args, split='train')
+        val_set = coco.COCOSegmentation(args, split='val')
+        num_class = train_set.NUM_CLASSES
+        train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, **kwargs)
+        val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, **kwargs)
+        test_loader = None
+        return train_loader, val_loader, test_loader, num_class
+    else:
+        raise NotImplementedError

dataloaders/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.46 kB). View file

dataloaders/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (1.45 kB). View file

dataloaders/__pycache__/custom_transforms.cpython-310.pyc ADDED Viewed

Binary file (5.23 kB). View file

dataloaders/__pycache__/custom_transforms.cpython-38.pyc ADDED Viewed

Binary file (5.32 kB). View file

dataloaders/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (3.85 kB). View file

dataloaders/__pycache__/utils.cpython-38.pyc ADDED Viewed

Binary file (3.42 kB). View file

dataloaders/custom_transforms.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import torch
+import random
+import numpy as np
+from PIL import Image, ImageOps, ImageFilter
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Args:
+        mean (tuple): means for each channel.
+        std (tuple): standard deviations for each channel.
+    """
+    def __init__(self, mean=(0., 0., 0.), std=(1., 1., 1.)):
+        self.mean = mean
+        self.std = std
+    def __call__(self, sample):
+        img = sample['image']
+        mask = sample['label']
+        img = np.array(img).astype(np.float32)
+        mask = np.array(mask).astype(np.float32)
+        img /= 255.0
+        img -= self.mean
+        img /= self.std
+        return {'image': img,
+                'label': mask}
+class ToTensor(object):
+    """Convert ndarrays in sample to Tensors."""
+    def __call__(self, sample):
+        # swap color axis because
+        # numpy image: H x W x C
+        # torch image: C X H X W
+        img = sample['image']
+        mask = sample['label']
+        img = np.array(img).astype(np.float32).transpose((2, 0, 1))
+        mask = np.array(mask).astype(np.float32)
+        img = torch.from_numpy(img).float()
+        mask = torch.from_numpy(mask).float()
+        return {'image': img,
+                'label': mask}
+class RandomHorizontalFlip(object):
+    def __call__(self, sample):
+        img = sample['image']
+        mask = sample['label']
+        if random.random() < 0.5:
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+            mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
+        return {'image': img,
+                'label': mask}
+class RandomRotate(object):
+    def __init__(self, degree):
+        self.degree = degree
+    def __call__(self, sample):
+        img = sample['image']
+        mask = sample['label']
+        rotate_degree = random.uniform(-1*self.degree, self.degree)
+        img = img.rotate(rotate_degree, Image.BILINEAR)
+        mask = mask.rotate(rotate_degree, Image.NEAREST)
+        return {'image': img,
+                'label': mask}
+class RandomGaussianBlur(object):
+    def __call__(self, sample):
+        img = sample['image']
+        mask = sample['label']
+        if random.random() < 0.5:
+            img = img.filter(ImageFilter.GaussianBlur(
+                radius=random.random()))
+        return {'image': img,
+                'label': mask}
+class RandomScaleCrop(object):
+    def __init__(self, base_size, crop_size, fill=0):
+        self.base_size = base_size
+        self.crop_size = crop_size
+        self.fill = fill
+    def __call__(self, sample):
+        img = sample['image']
+        mask = sample['label']
+        # random scale (short edge)
+        short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0))
+        w, h = img.size
+        if h > w:
+            ow = short_size
+            oh = int(1.0 * h * ow / w)
+        else:
+            oh = short_size
+            ow = int(1.0 * w * oh / h)
+        img = img.resize((ow, oh), Image.BILINEAR)
+        mask = mask.resize((ow, oh), Image.NEAREST)
+        # pad crop
+        if short_size < self.crop_size:
+            padh = self.crop_size - oh if oh < self.crop_size else 0
+            padw = self.crop_size - ow if ow < self.crop_size else 0
+            img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
+            mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=self.fill)
+        # random crop crop_size
+        w, h = img.size
+        x1 = random.randint(0, w - self.crop_size)
+        y1 = random.randint(0, h - self.crop_size)
+        img = img.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
+        mask = mask.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
+        return {'image': img,
+                'label': mask}
+class FixScaleCrop(object):
+    def __init__(self, crop_size):
+        self.crop_size = crop_size
+    def __call__(self, sample):
+        img = sample['image']
+        mask = sample['label']
+        w, h = img.size
+        if w > h:
+            oh = self.crop_size
+            ow = int(1.0 * w * oh / h)
+        else:
+            ow = self.crop_size
+            oh = int(1.0 * h * ow / w)
+        img = img.resize((ow, oh), Image.BILINEAR)
+        mask = mask.resize((ow, oh), Image.NEAREST)
+        # center crop
+        w, h = img.size
+        x1 = int(round((w - self.crop_size) / 2.))
+        y1 = int(round((h - self.crop_size) / 2.))
+        img = img.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
+        mask = mask.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
+        return {'image': img,
+                'label': mask}
+class FixedResize(object):
+    def __init__(self, size):
+        self.size = (size, size)  # size: (h, w)
+    def __call__(self, sample):
+        img = sample['image']
+        mask = sample['label']
+        assert img.size == mask.size
+        img = img.resize(self.size, Image.BILINEAR)
+        mask = mask.resize(self.size, Image.NEAREST)
+        return {'image': img,
+                'label': mask}

dataloaders/datasets/__init__.py ADDED Viewed

File without changes

dataloaders/datasets/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (171 Bytes). View file

dataloaders/datasets/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (154 Bytes). View file

dataloaders/datasets/__pycache__/cityscapes.cpython-310.pyc ADDED Viewed

Binary file (5.28 kB). View file

dataloaders/datasets/__pycache__/cityscapes.cpython-38.pyc ADDED Viewed

Binary file (5.43 kB). View file

dataloaders/datasets/__pycache__/coco.cpython-310.pyc ADDED Viewed

Binary file (5.38 kB). View file

dataloaders/datasets/__pycache__/coco.cpython-38.pyc ADDED Viewed

Binary file (5.4 kB). View file

dataloaders/datasets/__pycache__/combine_dbs.cpython-310.pyc ADDED Viewed

Binary file (3.19 kB). View file

dataloaders/datasets/__pycache__/combine_dbs.cpython-38.pyc ADDED Viewed

Binary file (3.17 kB). View file

dataloaders/datasets/__pycache__/invoice.cpython-310.pyc ADDED Viewed

Binary file (4.35 kB). View file

dataloaders/datasets/__pycache__/invoice.cpython-38.pyc ADDED Viewed

Binary file (4.31 kB). View file

dataloaders/datasets/__pycache__/pascal.cpython-310.pyc ADDED Viewed

Binary file (4.35 kB). View file

dataloaders/datasets/__pycache__/pascal.cpython-38.pyc ADDED Viewed

Binary file (4.31 kB). View file

dataloaders/datasets/__pycache__/sbd.cpython-310.pyc ADDED Viewed

Binary file (4.01 kB). View file

dataloaders/datasets/__pycache__/sbd.cpython-38.pyc ADDED Viewed

Binary file (3.97 kB). View file

dataloaders/datasets/cityscapes.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import os
+import numpy as np
+import scipy.misc as m
+from PIL import Image
+from torch.utils import data
+from mypath import Path
+from torchvision import transforms
+from dataloaders import custom_transforms as tr
+class CityscapesSegmentation(data.Dataset):
+    NUM_CLASSES = 19
+    def __init__(self, args, root=Path.db_root_dir('cityscapes'), split="train"):
+        self.root = root
+        self.split = split
+        self.args = args
+        self.files = {}
+        self.images_base = os.path.join(self.root, 'leftImg8bit', self.split)
+        self.annotations_base = os.path.join(self.root, 'gtFine_trainvaltest', 'gtFine', self.split)
+        self.files[split] = self.recursive_glob(rootdir=self.images_base, suffix='.png')
+        self.void_classes = [0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1]
+        self.valid_classes = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33]
+        self.class_names = ['unlabelled', 'road', 'sidewalk', 'building', 'wall', 'fence', \
+                            'pole', 'traffic_light', 'traffic_sign', 'vegetation', 'terrain', \
+                            'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', \
+                            'motorcycle', 'bicycle']
+        self.ignore_index = 255
+        self.class_map = dict(zip(self.valid_classes, range(self.NUM_CLASSES)))
+        if not self.files[split]:
+            raise Exception("No files for split=[%s] found in %s" % (split, self.images_base))
+        print("Found %d %s images" % (len(self.files[split]), split))
+    def __len__(self):
+        return len(self.files[self.split])
+    def __getitem__(self, index):
+        img_path = self.files[self.split][index].rstrip()
+        lbl_path = os.path.join(self.annotations_base,
+                                img_path.split(os.sep)[-2],
+                                os.path.basename(img_path)[:-15] + 'gtFine_labelIds.png')
+        _img = Image.open(img_path).convert('RGB')
+        _tmp = np.array(Image.open(lbl_path), dtype=np.uint8)
+        _tmp = self.encode_segmap(_tmp)
+        _target = Image.fromarray(_tmp)
+        sample = {'image': _img, 'label': _target}
+        if self.split == 'train':
+            return self.transform_tr(sample)
+        elif self.split == 'val':
+            return self.transform_val(sample)
+        elif self.split == 'test':
+            return self.transform_ts(sample)
+    def encode_segmap(self, mask):
+        # Put all void classes to zero
+        for _voidc in self.void_classes:
+            mask[mask == _voidc] = self.ignore_index
+        for _validc in self.valid_classes:
+            mask[mask == _validc] = self.class_map[_validc]
+        return mask
+    def recursive_glob(self, rootdir='.', suffix=''):
+        """Performs recursive glob with given suffix and rootdir
+            :param rootdir is the root directory
+            :param suffix is the suffix to be searched
+        """
+        return [os.path.join(looproot, filename)
+                for looproot, _, filenames in os.walk(rootdir)
+                for filename in filenames if filename.endswith(suffix)]
+    def transform_tr(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.RandomHorizontalFlip(),
+            tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size, fill=255),
+            tr.RandomGaussianBlur(),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+    def transform_val(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.FixScaleCrop(crop_size=self.args.crop_size),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+    def transform_ts(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.FixedResize(size=self.args.crop_size),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+if __name__ == '__main__':
+    from dataloaders.utils import decode_segmap
+    from torch.utils.data import DataLoader
+    import matplotlib.pyplot as plt
+    import argparse
+    parser = argparse.ArgumentParser()
+    args = parser.parse_args()
+    args.base_size = 513
+    args.crop_size = 513
+    cityscapes_train = CityscapesSegmentation(args, split='train')
+    dataloader = DataLoader(cityscapes_train, batch_size=2, shuffle=True, num_workers=2)
+    for ii, sample in enumerate(dataloader):
+        for jj in range(sample["image"].size()[0]):
+            img = sample['image'].numpy()
+            gt = sample['label'].numpy()
+            tmp = np.array(gt[jj]).astype(np.uint8)
+            segmap = decode_segmap(tmp, dataset='cityscapes')
+            img_tmp = np.transpose(img[jj], axes=[1, 2, 0])
+            img_tmp *= (0.229, 0.224, 0.225)
+            img_tmp += (0.485, 0.456, 0.406)
+            img_tmp *= 255.0
+            img_tmp = img_tmp.astype(np.uint8)
+            plt.figure()
+            plt.title('display')
+            plt.subplot(211)
+            plt.imshow(img_tmp)
+            plt.subplot(212)
+            plt.imshow(segmap)
+        if ii == 1:
+            break
+    plt.show(block=True)

dataloaders/datasets/coco.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from mypath import Path
+from tqdm import trange
+import os
+from pycocotools.coco import COCO
+from pycocotools import mask
+from torchvision import transforms
+from dataloaders import custom_transforms as tr
+from PIL import Image, ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+class COCOSegmentation(Dataset):
+    NUM_CLASSES = 21
+    CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
+        1, 64, 20, 63, 7, 72]
+    def __init__(self,
+                 args,
+                 base_dir=Path.db_root_dir('coco'),
+                 split='train',
+                 year='2017'):
+        super().__init__()
+        ann_file = os.path.join(base_dir, 'annotations/instances_{}{}.json'.format(split, year))
+        ids_file = os.path.join(base_dir, 'annotations/{}_ids_{}.pth'.format(split, year))
+        self.img_dir = os.path.join(base_dir, 'images/{}{}'.format(split, year))
+        self.split = split
+        self.coco = COCO(ann_file)
+        self.coco_mask = mask
+        if os.path.exists(ids_file):
+            self.ids = torch.load(ids_file)
+        else:
+            ids = list(self.coco.imgs.keys())
+            self.ids = self._preprocess(ids, ids_file)
+        self.args = args
+    def __getitem__(self, index):
+        _img, _target = self._make_img_gt_point_pair(index)
+        sample = {'image': _img, 'label': _target}
+        if self.split == "train":
+            return self.transform_tr(sample)
+        elif self.split == 'val':
+            return self.transform_val(sample)
+    def _make_img_gt_point_pair(self, index):
+        coco = self.coco
+        img_id = self.ids[index]
+        img_metadata = coco.loadImgs(img_id)[0]
+        path = img_metadata['file_name']
+        _img = Image.open(os.path.join(self.img_dir, path)).convert('RGB')
+        cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
+        _target = Image.fromarray(self._gen_seg_mask(
+            cocotarget, img_metadata['height'], img_metadata['width']))
+        return _img, _target
+    def _preprocess(self, ids, ids_file):
+        print("Preprocessing mask, this will take a while. " + \
+              "But don't worry, it only run once for each split.")
+        tbar = trange(len(ids))
+        new_ids = []
+        for i in tbar:
+            img_id = ids[i]
+            cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
+            img_metadata = self.coco.loadImgs(img_id)[0]
+            mask = self._gen_seg_mask(cocotarget, img_metadata['height'],
+                                      img_metadata['width'])
+            # more than 1k pixels
+            if (mask > 0).sum() > 1000:
+                new_ids.append(img_id)
+            tbar.set_description('Doing: {}/{}, got {} qualified images'. \
+                                 format(i, len(ids), len(new_ids)))
+        print('Found number of qualified images: ', len(new_ids))
+        torch.save(new_ids, ids_file)
+        return new_ids
+    def _gen_seg_mask(self, target, h, w):
+        mask = np.zeros((h, w), dtype=np.uint8)
+        coco_mask = self.coco_mask
+        for instance in target:
+            rle = coco_mask.frPyObjects(instance['segmentation'], h, w)
+            m = coco_mask.decode(rle)
+            cat = instance['category_id']
+            if cat in self.CAT_LIST:
+                c = self.CAT_LIST.index(cat)
+            else:
+                continue
+            if len(m.shape) < 3:
+                mask[:, :] += (mask == 0) * (m * c)
+            else:
+                mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8)
+        return mask
+    def transform_tr(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.RandomHorizontalFlip(),
+            tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size),
+            tr.RandomGaussianBlur(),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+    def transform_val(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.FixScaleCrop(crop_size=self.args.crop_size),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+    def __len__(self):
+        return len(self.ids)
+if __name__ == "__main__":
+    from dataloaders import custom_transforms as tr
+    from dataloaders.utils import decode_segmap
+    from torch.utils.data import DataLoader
+    from torchvision import transforms
+    import matplotlib.pyplot as plt
+    import argparse
+    parser = argparse.ArgumentParser()
+    args = parser.parse_args()
+    args.base_size = 513
+    args.crop_size = 513
+    coco_val = COCOSegmentation(args, split='val', year='2017')
+    dataloader = DataLoader(coco_val, batch_size=4, shuffle=True, num_workers=0)
+    for ii, sample in enumerate(dataloader):
+        for jj in range(sample["image"].size()[0]):
+            img = sample['image'].numpy()
+            gt = sample['label'].numpy()
+            tmp = np.array(gt[jj]).astype(np.uint8)
+            segmap = decode_segmap(tmp, dataset='coco')
+            img_tmp = np.transpose(img[jj], axes=[1, 2, 0])
+            img_tmp *= (0.229, 0.224, 0.225)
+            img_tmp += (0.485, 0.456, 0.406)
+            img_tmp *= 255.0
+            img_tmp = img_tmp.astype(np.uint8)
+            plt.figure()
+            plt.title('display')
+            plt.subplot(211)
+            plt.imshow(img_tmp)
+            plt.subplot(212)
+            plt.imshow(segmap)
+        if ii == 1:
+            break
+    plt.show(block=True)

dataloaders/datasets/combine_dbs.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import torch.utils.data as data
+class CombineDBs(data.Dataset):
+    NUM_CLASSES = 21
+    def __init__(self, dataloaders, excluded=None):
+        self.dataloaders = dataloaders
+        self.excluded = excluded
+        self.im_ids = []
+        # Combine object lists
+        for dl in dataloaders:
+            for elem in dl.im_ids:
+                if elem not in self.im_ids:
+                    self.im_ids.append(elem)
+        # Exclude
+        if excluded:
+            for dl in excluded:
+                for elem in dl.im_ids:
+                    if elem in self.im_ids:
+                        self.im_ids.remove(elem)
+        # Get object pointers
+        self.cat_list = []
+        self.im_list = []
+        new_im_ids = []
+        num_images = 0
+        for ii, dl in enumerate(dataloaders):
+            for jj, curr_im_id in enumerate(dl.im_ids):
+                if (curr_im_id in self.im_ids) and (curr_im_id not in new_im_ids):
+                    num_images += 1
+                    new_im_ids.append(curr_im_id)
+                    self.cat_list.append({'db_ii': ii, 'cat_ii': jj})
+        self.im_ids = new_im_ids
+        print('Combined number of images: {:d}'.format(num_images))
+    def __getitem__(self, index):
+        _db_ii = self.cat_list[index]["db_ii"]
+        _cat_ii = self.cat_list[index]['cat_ii']
+        sample = self.dataloaders[_db_ii].__getitem__(_cat_ii)
+        if 'meta' in sample.keys():
+            sample['meta']['db'] = str(self.dataloaders[_db_ii])
+        return sample
+    def __len__(self):
+        return len(self.cat_list)
+    def __str__(self):
+        include_db = [str(db) for db in self.dataloaders]
+        exclude_db = [str(db) for db in self.excluded]
+        return 'Included datasets:'+str(include_db)+'\n'+'Excluded datasets:'+str(exclude_db)
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+    from dataloaders.datasets import pascal, sbd
+    from dataloaders import sbd
+    import torch
+    import numpy as np
+    from dataloaders.utils import decode_segmap
+    import argparse
+    parser = argparse.ArgumentParser()
+    args = parser.parse_args()
+    args.base_size = 513
+    args.crop_size = 513
+    pascal_voc_val = pascal.VOCSegmentation(args, split='val')
+    sbd = sbd.SBDSegmentation(args, split=['train', 'val'])
+    pascal_voc_train = pascal.VOCSegmentation(args, split='train')
+    dataset = CombineDBs([pascal_voc_train, sbd], excluded=[pascal_voc_val])
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=0)
+    for ii, sample in enumerate(dataloader):
+        for jj in range(sample["image"].size()[0]):
+            img = sample['image'].numpy()
+            gt = sample['label'].numpy()
+            tmp = np.array(gt[jj]).astype(np.uint8)
+            segmap = decode_segmap(tmp, dataset='pascal')
+            img_tmp = np.transpose(img[jj], axes=[1, 2, 0])
+            img_tmp *= (0.229, 0.224, 0.225)
+            img_tmp += (0.485, 0.456, 0.406)
+            img_tmp *= 255.0
+            img_tmp = img_tmp.astype(np.uint8)
+            plt.figure()
+            plt.title('display')
+            plt.subplot(211)
+            plt.imshow(img_tmp)
+            plt.subplot(212)
+            plt.imshow(segmap)
+        if ii == 1:
+            break
+    plt.show(block=True)

dataloaders/datasets/invoice.py ADDED Viewed

	@@ -0,0 +1,145 @@

+from __future__ import print_function, division
+import os
+from PIL import Image
+import numpy as np
+from torch.utils.data import Dataset
+from mypath import Path
+from torchvision import transforms
+from dataloaders import custom_transforms as tr
+class VOCSegmentation(Dataset):
+    """
+    PascalVoc dataset
+    """
+    NUM_CLASSES = 2
+    def __init__(self,
+                 args,
+                 base_dir=Path.db_root_dir('invoice'),
+                 split='train',
+                 ):
+        """
+        :param base_dir: path to VOC dataset directory
+        :param split: train/val
+        :param transform: transform to apply
+        """
+        super().__init__()
+        self._base_dir = base_dir
+        self._image_dir = os.path.join(self._base_dir, 'JPEGImages')
+        self._cat_dir = os.path.join(self._base_dir, 'SegmentationClass')
+        if isinstance(split, str):
+            self.split = [split]
+        else:
+            split.sort()
+            self.split = split
+        self.args = args
+        _splits_dir = os.path.join(self._base_dir, 'ImageSets', 'Segmentation')
+        self.im_ids = []
+        self.images = []
+        self.categories = []
+        for splt in self.split:
+            with open(os.path.join(os.path.join(_splits_dir, splt + '.txt')), "r") as f:
+                lines = f.read().splitlines()
+            for ii, line in enumerate(lines):
+                _image = os.path.join(self._image_dir, line + ".png")
+                _cat = os.path.join(self._cat_dir, line + ".png")
+                assert os.path.isfile(_image)
+                assert os.path.isfile(_cat)
+                self.im_ids.append(line)
+                self.images.append(_image)
+                self.categories.append(_cat)
+        assert (len(self.images) == len(self.categories))
+        # Display stats
+        print('Number of images in {}: {:d}'.format(split, len(self.images)))
+    def __len__(self):
+        return len(self.images)
+    def __getitem__(self, index):
+        _img, _target = self._make_img_gt_point_pair(index)
+        sample = {'image': _img, 'label': _target}
+        for split in self.split:
+            if split == "train":
+                return self.transform_tr(sample)
+            elif split == 'val':
+                return self.transform_val(sample)
+    def _make_img_gt_point_pair(self, index):
+        _img = Image.open(self.images[index]).convert('RGB')
+        _target = Image.open(self.categories[index])
+        return _img, _target
+    def transform_tr(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.RandomHorizontalFlip(),
+            tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size),
+            tr.RandomGaussianBlur(),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+    def transform_val(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.FixScaleCrop(crop_size=self.args.crop_size),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+    def __str__(self):
+        return 'VOC2012(split=' + str(self.split) + ')'
+if __name__ == '__main__':
+    from dataloaders.utils import decode_segmap
+    from torch.utils.data import DataLoader
+    import matplotlib.pyplot as plt
+    import argparse
+    parser = argparse.ArgumentParser()
+    args = parser.parse_args()
+    args.base_size = 512
+    args.crop_size = 512
+    voc_train = VOCSegmentation(args, split='train')
+    dataloader = DataLoader(voc_train, batch_size=5, shuffle=True, num_workers=0)
+    for ii, sample in enumerate(dataloader):
+        for jj in range(sample["image"].size()[0]):
+            img = sample['image'].numpy()
+            gt = sample['label'].numpy()
+            tmp = np.array(gt[jj]).astype(np.uint8)
+            segmap = decode_segmap(tmp, dataset='invoice')
+            img_tmp = np.transpose(img[jj], axes=[1, 2, 0])
+            img_tmp *= (0.229, 0.224, 0.225)
+            img_tmp += (0.485, 0.456, 0.406)
+            img_tmp *= 255.0
+            img_tmp = img_tmp.astype(np.uint8)
+            plt.figure()
+            plt.title('display')
+            plt.subplot(211)
+            plt.imshow(img_tmp)
+            plt.subplot(212)
+            plt.imshow(segmap)
+        if ii == 1:
+            break
+    plt.show(block=True)

dataloaders/datasets/pascal.py ADDED Viewed

	@@ -0,0 +1,145 @@

+from __future__ import print_function, division
+import os
+from PIL import Image
+import numpy as np
+from torch.utils.data import Dataset
+from mypath import Path
+from torchvision import transforms
+from dataloaders import custom_transforms as tr
+class VOCSegmentation(Dataset):
+    """
+    PascalVoc dataset
+    """
+    NUM_CLASSES = 21
+    def __init__(self,
+                 args,
+                 base_dir=Path.db_root_dir('pascal'),
+                 split='train',
+                 ):
+        """
+        :param base_dir: path to VOC dataset directory
+        :param split: train/val
+        :param transform: transform to apply
+        """
+        super().__init__()
+        self._base_dir = base_dir
+        self._image_dir = os.path.join(self._base_dir, 'JPEGImages')
+        self._cat_dir = os.path.join(self._base_dir, 'SegmentationClass')
+        if isinstance(split, str):
+            self.split = [split]
+        else:
+            split.sort()
+            self.split = split
+        self.args = args
+        _splits_dir = os.path.join(self._base_dir, 'ImageSets', 'Segmentation')
+        self.im_ids = []
+        self.images = []
+        self.categories = []
+        for splt in self.split:
+            with open(os.path.join(os.path.join(_splits_dir, splt + '.txt')), "r") as f:
+                lines = f.read().splitlines()
+            for ii, line in enumerate(lines):
+                _image = os.path.join(self._image_dir, line + ".jpg")
+                _cat = os.path.join(self._cat_dir, line + ".png")
+                assert os.path.isfile(_image)
+                assert os.path.isfile(_cat)
+                self.im_ids.append(line)
+                self.images.append(_image)
+                self.categories.append(_cat)
+        assert (len(self.images) == len(self.categories))
+        # Display stats
+        print('Number of images in {}: {:d}'.format(split, len(self.images)))
+    def __len__(self):
+        return len(self.images)
+    def __getitem__(self, index):
+        _img, _target = self._make_img_gt_point_pair(index)
+        sample = {'image': _img, 'label': _target}
+        for split in self.split:
+            if split == "train":
+                return self.transform_tr(sample)
+            elif split == 'val':
+                return self.transform_val(sample)
+    def _make_img_gt_point_pair(self, index):
+        _img = Image.open(self.images[index]).convert('RGB')
+        _target = Image.open(self.categories[index])
+        return _img, _target
+    def transform_tr(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.RandomHorizontalFlip(),
+            tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size),
+            tr.RandomGaussianBlur(),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+    def transform_val(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.FixScaleCrop(crop_size=self.args.crop_size),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+    def __str__(self):
+        return 'VOC2012(split=' + str(self.split) + ')'
+if __name__ == '__main__':
+    from dataloaders.utils import decode_segmap
+    from torch.utils.data import DataLoader
+    import matplotlib.pyplot as plt
+    import argparse
+    parser = argparse.ArgumentParser()
+    args = parser.parse_args()
+    args.base_size = 513
+    args.crop_size = 513
+    voc_train = VOCSegmentation(args, split='train')
+    dataloader = DataLoader(voc_train, batch_size=5, shuffle=True, num_workers=0)
+    for ii, sample in enumerate(dataloader):
+        for jj in range(sample["image"].size()[0]):
+            img = sample['image'].numpy()
+            gt = sample['label'].numpy()
+            tmp = np.array(gt[jj]).astype(np.uint8)
+            segmap = decode_segmap(tmp, dataset='pascal')
+            img_tmp = np.transpose(img[jj], axes=[1, 2, 0])
+            img_tmp *= (0.229, 0.224, 0.225)
+            img_tmp += (0.485, 0.456, 0.406)
+            img_tmp *= 255.0
+            img_tmp = img_tmp.astype(np.uint8)
+            plt.figure()
+            plt.title('display')
+            plt.subplot(211)
+            plt.imshow(img_tmp)
+            plt.subplot(212)
+            plt.imshow(segmap)
+        if ii == 1:
+            break
+    plt.show(block=True)

dataloaders/datasets/sbd.py ADDED Viewed

	@@ -0,0 +1,129 @@

+from __future__ import print_function, division
+import os
+import numpy as np
+import scipy.io
+import torch.utils.data as data
+from PIL import Image
+from mypath import Path
+from torchvision import transforms
+from dataloaders import custom_transforms as tr
+class SBDSegmentation(data.Dataset):
+    NUM_CLASSES = 21
+    def __init__(self,
+                 args,
+                 base_dir=Path.db_root_dir('sbd'),
+                 split='train',
+                 ):
+        """
+        :param base_dir: path to VOC dataset directory
+        :param split: train/val
+        :param transform: transform to apply
+        """
+        super().__init__()
+        self._base_dir = base_dir
+        self._dataset_dir = os.path.join(self._base_dir, 'dataset')
+        self._image_dir = os.path.join(self._dataset_dir, 'img')
+        self._cat_dir = os.path.join(self._dataset_dir, 'cls')
+        if isinstance(split, str):
+            self.split = [split]
+        else:
+            split.sort()
+            self.split = split
+        self.args = args
+        # Get list of all images from the split and check that the files exist
+        self.im_ids = []
+        self.images = []
+        self.categories = []
+        for splt in self.split:
+            with open(os.path.join(self._dataset_dir, splt + '.txt'), "r") as f:
+                lines = f.read().splitlines()
+            for line in lines:
+                _image = os.path.join(self._image_dir, line + ".jpg")
+                _categ= os.path.join(self._cat_dir, line + ".mat")
+                assert os.path.isfile(_image)
+                assert os.path.isfile(_categ)
+                self.im_ids.append(line)
+                self.images.append(_image)
+                self.categories.append(_categ)
+        assert (len(self.images) == len(self.categories))
+        # Display stats
+        print('Number of images: {:d}'.format(len(self.images)))
+    def __getitem__(self, index):
+        _img, _target = self._make_img_gt_point_pair(index)
+        sample = {'image': _img, 'label': _target}
+        return self.transform(sample)
+    def __len__(self):
+        return len(self.images)
+    def _make_img_gt_point_pair(self, index):
+        _img = Image.open(self.images[index]).convert('RGB')
+        _target = Image.fromarray(scipy.io.loadmat(self.categories[index])["GTcls"][0]['Segmentation'][0])
+        return _img, _target
+    def transform(self, sample):
+        composed_transforms = transforms.Compose([
+            tr.RandomHorizontalFlip(),
+            tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size),
+            tr.RandomGaussianBlur(),
+            tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            tr.ToTensor()])
+        return composed_transforms(sample)
+    def __str__(self):
+        return 'SBDSegmentation(split=' + str(self.split) + ')'
+if __name__ == '__main__':
+    from dataloaders.utils import decode_segmap
+    from torch.utils.data import DataLoader
+    import matplotlib.pyplot as plt
+    import argparse
+    parser = argparse.ArgumentParser()
+    args = parser.parse_args()
+    args.base_size = 513
+    args.crop_size = 513
+    sbd_train = SBDSegmentation(args, split='train')
+    dataloader = DataLoader(sbd_train, batch_size=2, shuffle=True, num_workers=2)
+    for ii, sample in enumerate(dataloader):
+        for jj in range(sample["image"].size()[0]):
+            img = sample['image'].numpy()
+            gt = sample['label'].numpy()
+            tmp = np.array(gt[jj]).astype(np.uint8)
+            segmap = decode_segmap(tmp, dataset='pascal')
+            img_tmp = np.transpose(img[jj], axes=[1, 2, 0])
+            img_tmp *= (0.229, 0.224, 0.225)
+            img_tmp += (0.485, 0.456, 0.406)
+            img_tmp *= 255.0
+            img_tmp = img_tmp.astype(np.uint8)
+            plt.figure()
+            plt.title('display')
+            plt.subplot(211)
+            plt.imshow(img_tmp)
+            plt.subplot(212)
+            plt.imshow(segmap)
+        if ii == 1:
+            break
+    plt.show(block=True)

dataloaders/utils.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+def decode_seg_map_sequence(label_masks, dataset='pascal'):
+    rgb_masks = []
+    for label_mask in label_masks:
+        rgb_mask = decode_segmap(label_mask, dataset)
+        rgb_masks.append(rgb_mask)
+    rgb_masks = torch.from_numpy(np.array(rgb_masks).transpose([0, 3, 1, 2]))
+    return rgb_masks
+def decode_segmap(label_mask, dataset, plot=False):
+    """Decode segmentation class labels into a color image
+    Args:
+        label_mask (np.ndarray): an (M,N) array of integer values denoting
+          the class label at each spatial location.
+        plot (bool, optional): whether to show the resulting color image
+          in a figure.
+    Returns:
+        (np.ndarray, optional): the resulting decoded color image.
+    """
+    if dataset == 'pascal' or dataset == 'coco':
+        n_classes = 21
+        label_colours = get_pascal_labels()
+    elif dataset == 'cityscapes':
+        n_classes = 19
+        label_colours = get_cityscapes_labels()
+    elif dataset == 'invoice':
+        n_classes = 2
+        label_colours = get_invoice_labels()
+    else:
+        raise NotImplementedError
+    r = label_mask.copy()
+    g = label_mask.copy()
+    b = label_mask.copy()
+    for ll in range(0, n_classes):
+        r[label_mask == ll] = label_colours[ll, 0]
+        g[label_mask == ll] = label_colours[ll, 1]
+        b[label_mask == ll] = label_colours[ll, 2]
+    rgb = np.zeros((label_mask.shape[0], label_mask.shape[1], 3))
+    rgb[:, :, 0] = r / 255.0
+    rgb[:, :, 1] = g / 255.0
+    rgb[:, :, 2] = b / 255.0
+    if plot:
+        plt.imshow(rgb)
+        plt.show()
+    else:
+        return rgb
+def encode_segmap(mask):
+    """Encode segmentation label images as pascal classes
+    Args:
+        mask (np.ndarray): raw segmentation label image of dimension
+          (M, N, 3), in which the Pascal classes are encoded as colours.
+    Returns:
+        (np.ndarray): class map with dimensions (M,N), where the value at
+        a given location is the integer denoting the class index.
+    """
+    mask = mask.astype(int)
+    label_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.int16)
+    for ii, label in enumerate(get_pascal_labels()):
+        label_mask[np.where(np.all(mask == label, axis=-1))[:2]] = ii
+    label_mask = label_mask.astype(int)
+    return label_mask
+def get_cityscapes_labels():
+    return np.array([
+        [128, 64, 128],
+        [244, 35, 232],
+        [70, 70, 70],
+        [102, 102, 156],
+        [190, 153, 153],
+        [153, 153, 153],
+        [250, 170, 30],
+        [220, 220, 0],
+        [107, 142, 35],
+        [152, 251, 152],
+        [0, 130, 180],
+        [220, 20, 60],
+        [255, 0, 0],
+        [0, 0, 142],
+        [0, 0, 70],
+        [0, 60, 100],
+        [0, 80, 100],
+        [0, 0, 230],
+        [119, 11, 32]])
+def get_pascal_labels():
+    """Load the mapping that associates pascal classes with label colors
+    Returns:
+        np.ndarray with dimensions (21, 3)
+    """
+    return np.asarray([[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
+                       [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
+                       [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
+                       [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
+                       [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
+                       [0, 64, 128]])
+def get_invoice_labels():
+    """Load the mapping that associates pascal classes with label colors
+    Returns:
+        np.ndarray with dimensions (21, 3)
+    """
+    return np.asarray([[0, 0, 0], [255, 255, 255]])

deeplab-mobilenet.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a36ba48f39fc6edc161335211b15d9250cadb521f1cb958cb6d014399093f31
+size 46666796

deeplab-resnet.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c1ca4610f1ff8c118b451aa0ab30048554a9e77b794f7174808c457e935913a
+size 474903453

deeplab_demo.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# Description: This script is used to extract the specified category from the image using the trained DeepLabV3+ model.
+# file name: deeplab_demo.py
+import argparse
+import time
+from modeling.deeplab import *
+from dataloaders import custom_transforms as tr
+from PIL import Image
+from torchvision import transforms
+from dataloaders.utils import  *
+from torchvision.utils import make_grid, save_image
+from torchvision.transforms import ToTensor, ToPILImage
+def get_people(newimage):
+    #define the argument parser for configuring the model
+    parser = argparse.ArgumentParser(description="PyTorch DeeplabV3Plus Training")
+    parser.add_argument('--in-path', type=str, default="img", help='image to test')
+    # parser.add_argument('--out-path', type=str, required=True, help='mask image to save')
+    parser.add_argument('--backbone', type=str, default='mobilenet',
+                        choices=['resnet', 'xception', 'drn', 'mobilenet'],
+                        help='backbone name (default: resnet)')
+    parser.add_argument('--ckpt', type=str, default='deeplab-mobilenet.pth.tar',
+                        help='saved model')
+    parser.add_argument('--out-stride', type=int, default=8,
+                        help='network output stride (default: 8)')
+    parser.add_argument('--no-cuda', action='store_true', default=False,
+                        help='disables CUDA training')
+    parser.add_argument('--gpu-ids', type=str, default='0',
+                        help='use which gpu to train, must be a \
+                        comma-separated list of integers only (default=0)')
+    parser.add_argument('--dataset', type=str, default='invoice',
+                        choices=['pascal', 'coco', 'cityscapes','invoice'],
+                        help='dataset name (default: pascal)')
+    parser.add_argument('--crop-size', type=int, default=512,
+                        help='crop image size')
+    parser.add_argument('--num_classes', type=int, default=21,
+                        help='crop image size')
+    parser.add_argument('--sync-bn', type=bool, default=None,
+                        help='whether to use sync bn (default: auto)')
+    parser.add_argument('--freeze-bn', type=bool, default=False,
+                        help='whether to freeze bn parameters (default: False)')
+    args = parser.parse_args()
+    args.cuda = not args.no_cuda and torch.cuda.is_available()
+    if args.cuda:
+        try:
+            args.gpu_ids = [int(s) for s in args.gpu_ids.split(',')]
+        except ValueError:
+            raise ValueError('Argument --gpu_ids must be a comma-separated list of integers only')
+    if args.sync_bn is None:
+        if args.cuda and len(args.gpu_ids) > 1:
+            args.sync_bn = True
+        else:
+            args.sync_bn = False
+    model_s_time = time.time()
+    model = DeepLab(num_classes=args.num_classes,
+                    backbone=args.backbone,
+                    output_stride=args.out_stride,
+                    sync_bn=args.sync_bn,
+                    freeze_bn=args.freeze_bn)
+    ckpt = torch.load(args.ckpt, map_location='cpu')
+    model.load_state_dict(ckpt['state_dict'])
+    # model = model.cuda()
+    model_u_time = time.time()
+    model_load_time = model_u_time-model_s_time
+    print("model load time is {}".format(model_load_time))
+    composed_transforms = transforms.Compose([
+        tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+        tr.ToTensor()])
+    image = newimage
+    s_time = time.time()
+    target = newimage
+    sample = {'image': image, 'label': target}
+    tensor_in = composed_transforms(sample)['image'].unsqueeze(0)
+    model.eval()
+    if args.cuda:
+        tensor_in = tensor_in.cuda()
+    with torch.no_grad():
+        output = model(tensor_in)
+    # Get category index
+    pred = torch.max(output, 1)[1].detach().cpu().numpy()
+    # Specify the category label to extract
+    target_class = 15  #replace with the category index you want to extract
+    mask = (pred == target_class).astype(np.uint8).squeeze()
+    # Apply the mask to the original image
+    image_np = np.array(image)
+    masked_image = image_np * mask[:, :, np.newaxis]
+    # save the masked area
+    masked_image_pil = Image.fromarray(masked_image)
+    grid_image = make_grid(decode_seg_map_sequence(torch.max(output[:3], 1)[1].detach().cpu().numpy()),
+                           3, normalize=False)
+    u_time = time.time()
+    img_time = u_time - s_time
+    print("time: {} ".format(img_time))
+    return masked_image_pil, grid_image
+# mypath=r'img/people.jpg'
+# image = Image.open(mypath).convert('RGB')
+# result, mask=get_people(image)
+# result_tensor = ToTensor()(result)
+# save_image(result_tensor, "masked.png")

end.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# filename: end.py
+# Description: This is the main file of the project. It is used to create the Gradio interface and run the application.
+import gradio as gr
+from PIL import Image
+from deeplab_demo import get_people
+from creat_anaglyph import insert_person_to_stereo_gradio
+import torch
+from torchvision.transforms import ToPILImage
+# Define functions to process the person image and generate the anaglyph image
+def process_person_image(person_image):
+    masked_image_pil, grid_image = get_people(person_image)
+    if isinstance(masked_image_pil, torch.Tensor):
+        masked_image_pil = ToPILImage()(masked_image_pil)
+    if isinstance(grid_image, torch.Tensor):
+        grid_image = ToPILImage()(grid_image)
+    return masked_image_pil, grid_image
+# Define a function to generate the anaglyph image
+def generate_anaglyph(masked_image_pil, scenery_image, depth_option, custom_disparity):
+    # Define default disparities for non-custom options
+    # non-custom options: close, medium, far
+    depth_disparities = {
+        "close": 10,  # Adjust values as needed
+        "medium": 5,
+        "far": 2
+    }
+    # Use custom_disparity only if depth_option is "custom"
+    disparity = custom_disparity if depth_option == "custom" else depth_disparities.get(depth_option, 5)
+    # Ensure input is PIL image
+    if isinstance(masked_image_pil, torch.Tensor):
+        masked_image_pil = ToPILImage()(masked_image_pil)
+    if isinstance(scenery_image, torch.Tensor):
+        scenery_image = ToPILImage()(scenery_image)
+    anaglyph_image = insert_person_to_stereo_gradio(scenery_image, masked_image_pil, disparity)
+    if isinstance(anaglyph_image, torch.Tensor):
+        anaglyph_image = ToPILImage()(anaglyph_image)
+    return anaglyph_image
+# Create Gradio interface
+with gr.Blocks() as iface:
+    with gr.Row():
+        person_image_input = gr.Image(type="pil", label="Character image")
+        scenery_image_input = gr.Image(type="pil", label="Landscape images")
+        depth_option_input = gr.Dropdown(choices=["close", "medium", "far", "custom"], label="Depth Options")
+        custom_disparity_input = gr.Slider(minimum=0, maximum=50, step=1, label="Custom Depth Disparity", visible=False)
+    with gr.Row():
+        grid_image_output = gr.Image(type="pil", label="Grid", interactive=False)
+        masked_image_output = gr.Image(type="pil", label="Masked", interactive=False)
+        anaglyph_image_output = gr.Image(type="pil", label="Anaglyph", interactive=False)
+    # button1: Process the character image
+    process_button = gr.Button("Processing human images")
+    process_button.click(
+        fn=process_person_image,
+        inputs=person_image_input,
+        outputs=[masked_image_output, grid_image_output]
+    )
+    # define a function to update the visibility of the custom disparity slider based on the depth option
+    def update_custom_slider_visibility(depth_option):
+        return gr.update(visible=(depth_option == "custom"))
+    depth_option_input.change(
+        fn=update_custom_slider_visibility,
+        inputs=[depth_option_input],
+        outputs=custom_disparity_input
+    )
+    # button2: Generate anaglyph image
+    generate_button = gr.Button("Generate Anaglyph Image")
+    generate_button.click(
+        fn=generate_anaglyph,
+        inputs=[masked_image_output, scenery_image_input, depth_option_input, custom_disparity_input],
+        outputs=anaglyph_image_output
+    )
+# Launch the Gradio interface
+#change from iface.launch()
+iface.launch(share=True)