koichi12 commited on Feb 12, 2025

Commit

208efc9

verified ·

1 Parent(s): 04b7ba0

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.venv/lib/python3.11/site-packages/torchvision/datasets/__init__.py +146 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/_optical_flow.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/_stereo_matching.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/celeba.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/cifar.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/cityscapes.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/clevr.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/dtd.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/eurosat.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/fer2013.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/fgvc_aircraft.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/flowers102.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/folder.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/food101.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/gtsrb.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/imagenette.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/inaturalist.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/kitti.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/lfw.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/lsun.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/moving_mnist.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/omniglot.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/oxford_iiit_pet.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/pcam.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/phototour.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/places365.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/rendered_sst2.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/sbd.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/semeion.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/stanford_cars.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/stl10.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/svhn.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/ucf101.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/usps.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/vision.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/_stereo_matching.py +1224 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/caltech.py +242 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/celeba.py +194 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/cifar.py +168 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/cityscapes.py +222 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/clevr.py +88 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/coco.py +109 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/dtd.py +100 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/eurosat.py +62 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/fakedata.py +67 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/fer2013.py +120 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/fgvc_aircraft.py +115 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/flickr.py +167 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/flowers102.py +114 -0
.venv/lib/python3.11/site-packages/torchvision/datasets/food101.py +93 -0

.venv/lib/python3.11/site-packages/torchvision/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,146 @@

+from ._optical_flow import FlyingChairs, FlyingThings3D, HD1K, KittiFlow, Sintel
+from ._stereo_matching import (
+    CarlaStereo,
+    CREStereo,
+    ETH3DStereo,
+    FallingThingsStereo,
+    InStereo2k,
+    Kitti2012Stereo,
+    Kitti2015Stereo,
+    Middlebury2014Stereo,
+    SceneFlowStereo,
+    SintelStereo,
+)
+from .caltech import Caltech101, Caltech256
+from .celeba import CelebA
+from .cifar import CIFAR10, CIFAR100
+from .cityscapes import Cityscapes
+from .clevr import CLEVRClassification
+from .coco import CocoCaptions, CocoDetection
+from .country211 import Country211
+from .dtd import DTD
+from .eurosat import EuroSAT
+from .fakedata import FakeData
+from .fer2013 import FER2013
+from .fgvc_aircraft import FGVCAircraft
+from .flickr import Flickr30k, Flickr8k
+from .flowers102 import Flowers102
+from .folder import DatasetFolder, ImageFolder
+from .food101 import Food101
+from .gtsrb import GTSRB
+from .hmdb51 import HMDB51
+from .imagenet import ImageNet
+from .imagenette import Imagenette
+from .inaturalist import INaturalist
+from .kinetics import Kinetics
+from .kitti import Kitti
+from .lfw import LFWPairs, LFWPeople
+from .lsun import LSUN, LSUNClass
+from .mnist import EMNIST, FashionMNIST, KMNIST, MNIST, QMNIST
+from .moving_mnist import MovingMNIST
+from .omniglot import Omniglot
+from .oxford_iiit_pet import OxfordIIITPet
+from .pcam import PCAM
+from .phototour import PhotoTour
+from .places365 import Places365
+from .rendered_sst2 import RenderedSST2
+from .sbd import SBDataset
+from .sbu import SBU
+from .semeion import SEMEION
+from .stanford_cars import StanfordCars
+from .stl10 import STL10
+from .sun397 import SUN397
+from .svhn import SVHN
+from .ucf101 import UCF101
+from .usps import USPS
+from .vision import VisionDataset
+from .voc import VOCDetection, VOCSegmentation
+from .widerface import WIDERFace
+__all__ = (
+    "LSUN",
+    "LSUNClass",
+    "ImageFolder",
+    "DatasetFolder",
+    "FakeData",
+    "CocoCaptions",
+    "CocoDetection",
+    "CIFAR10",
+    "CIFAR100",
+    "EMNIST",
+    "FashionMNIST",
+    "QMNIST",
+    "MNIST",
+    "KMNIST",
+    "StanfordCars",
+    "STL10",
+    "SUN397",
+    "SVHN",
+    "PhotoTour",
+    "SEMEION",
+    "Omniglot",
+    "SBU",
+    "Flickr8k",
+    "Flickr30k",
+    "Flowers102",
+    "VOCSegmentation",
+    "VOCDetection",
+    "Cityscapes",
+    "ImageNet",
+    "Caltech101",
+    "Caltech256",
+    "CelebA",
+    "WIDERFace",
+    "SBDataset",
+    "VisionDataset",
+    "USPS",
+    "Kinetics",
+    "HMDB51",
+    "UCF101",
+    "Places365",
+    "Kitti",
+    "INaturalist",
+    "LFWPeople",
+    "LFWPairs",
+    "KittiFlow",
+    "Sintel",
+    "FlyingChairs",
+    "FlyingThings3D",
+    "HD1K",
+    "Food101",
+    "DTD",
+    "FER2013",
+    "GTSRB",
+    "CLEVRClassification",
+    "OxfordIIITPet",
+    "PCAM",
+    "Country211",
+    "FGVCAircraft",
+    "EuroSAT",
+    "RenderedSST2",
+    "Kitti2012Stereo",
+    "Kitti2015Stereo",
+    "CarlaStereo",
+    "Middlebury2014Stereo",
+    "CREStereo",
+    "FallingThingsStereo",
+    "SceneFlowStereo",
+    "SintelStereo",
+    "InStereo2k",
+    "ETH3DStereo",
+    "wrap_dataset_for_transforms_v2",
+    "Imagenette",
+)
+# We override current module's attributes to handle the import:
+# from torchvision.datasets import wrap_dataset_for_transforms_v2
+# without a cyclic error.
+# Ref: https://peps.python.org/pep-0562/
+def __getattr__(name):
+    if name in ("wrap_dataset_for_transforms_v2",):
+        from torchvision.tv_tensors._dataset_wrapper import wrap_dataset_for_transforms_v2
+        return wrap_dataset_for_transforms_v2
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/_optical_flow.cpython-311.pyc ADDED Viewed

Binary file (27.8 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/_stereo_matching.cpython-311.pyc ADDED Viewed

Binary file (59.3 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/celeba.cpython-311.pyc ADDED Viewed

Binary file (12 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/cifar.cpython-311.pyc ADDED Viewed

Binary file (9.05 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/cityscapes.cpython-311.pyc ADDED Viewed

Binary file (14.3 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/clevr.cpython-311.pyc ADDED Viewed

Binary file (6.58 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/dtd.cpython-311.pyc ADDED Viewed

Binary file (7.21 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/eurosat.cpython-311.pyc ADDED Viewed

Binary file (4 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/fer2013.cpython-311.pyc ADDED Viewed

Binary file (7.66 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/fgvc_aircraft.cpython-311.pyc ADDED Viewed

Binary file (7.66 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/flowers102.cpython-311.pyc ADDED Viewed

Binary file (7.26 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/folder.cpython-311.pyc ADDED Viewed

Binary file (17.4 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/food101.cpython-311.pyc ADDED Viewed

Binary file (6.97 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/gtsrb.cpython-311.pyc ADDED Viewed

Binary file (6.04 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/imagenette.cpython-311.pyc ADDED Viewed

Binary file (7.02 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/inaturalist.cpython-311.pyc ADDED Viewed

Binary file (14.3 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/kitti.cpython-311.pyc ADDED Viewed

Binary file (9.38 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/lfw.cpython-311.pyc ADDED Viewed

Binary file (18.8 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/lsun.cpython-311.pyc ADDED Viewed

Binary file (10.3 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/moving_mnist.cpython-311.pyc ADDED Viewed

Binary file (6.03 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/omniglot.cpython-311.pyc ADDED Viewed

Binary file (7.32 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/oxford_iiit_pet.cpython-311.pyc ADDED Viewed

Binary file (10 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/pcam.cpython-311.pyc ADDED Viewed

Binary file (7.86 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/phototour.cpython-311.pyc ADDED Viewed

Binary file (13.4 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/places365.cpython-311.pyc ADDED Viewed

Binary file (12.3 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/rendered_sst2.cpython-311.pyc ADDED Viewed

Binary file (5.83 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/sbd.cpython-311.pyc ADDED Viewed

Binary file (9.53 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/semeion.cpython-311.pyc ADDED Viewed

Binary file (5.17 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/stanford_cars.cpython-311.pyc ADDED Viewed

Binary file (6.91 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/stl10.cpython-311.pyc ADDED Viewed

Binary file (11.9 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/svhn.cpython-311.pyc ADDED Viewed

Binary file (6.68 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/ucf101.cpython-311.pyc ADDED Viewed

Binary file (8.49 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/usps.cpython-311.pyc ADDED Viewed

Binary file (6.15 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/__pycache__/vision.cpython-311.pyc ADDED Viewed

Binary file (7.58 kB). View file

.venv/lib/python3.11/site-packages/torchvision/datasets/_stereo_matching.py ADDED Viewed

	@@ -0,0 +1,1224 @@

+import functools
+import json
+import os
+import random
+import shutil
+from abc import ABC, abstractmethod
+from glob import glob
+from pathlib import Path
+from typing import Callable, cast, List, Optional, Tuple, Union
+import numpy as np
+from PIL import Image
+from .utils import _read_pfm, download_and_extract_archive, verify_str_arg
+from .vision import VisionDataset
+T1 = Tuple[Image.Image, Image.Image, Optional[np.ndarray], np.ndarray]
+T2 = Tuple[Image.Image, Image.Image, Optional[np.ndarray]]
+__all__ = ()
+_read_pfm_file = functools.partial(_read_pfm, slice_channels=1)
+class StereoMatchingDataset(ABC, VisionDataset):
+    """Base interface for Stereo matching datasets"""
+    _has_built_in_disparity_mask = False
+    def __init__(self, root: Union[str, Path], transforms: Optional[Callable] = None) -> None:
+        """
+        Args:
+            root(str): Root directory of the dataset.
+            transforms(callable, optional): A function/transform that takes in Tuples of
+                (images, disparities, valid_masks) and returns a transformed version of each of them.
+                images is a Tuple of (``PIL.Image``, ``PIL.Image``)
+                disparities is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (1, H, W)
+                valid_masks is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (H, W)
+                In some cases, when a dataset does not provide disparities, the ``disparities`` and
+                ``valid_masks`` can be Tuples containing None values.
+                For training splits generally the datasets provide a minimal guarantee of
+                images: (``PIL.Image``, ``PIL.Image``)
+                disparities: (``np.ndarray``, ``None``) with shape (1, H, W)
+                Optionally, based on the dataset, it can return a ``mask`` as well:
+                valid_masks: (``np.ndarray | None``, ``None``) with shape (H, W)
+                For some test splits, the datasets provides outputs that look like:
+                imgaes: (``PIL.Image``, ``PIL.Image``)
+                disparities: (``None``, ``None``)
+                Optionally, based on the dataset, it can return a ``mask`` as well:
+                valid_masks: (``None``, ``None``)
+        """
+        super().__init__(root=root)
+        self.transforms = transforms
+        self._images = []  # type: ignore
+        self._disparities = []  # type: ignore
+    def _read_img(self, file_path: Union[str, Path]) -> Image.Image:
+        img = Image.open(file_path)
+        if img.mode != "RGB":
+            img = img.convert("RGB")  # type: ignore [assignment]
+        return img
+    def _scan_pairs(
+        self,
+        paths_left_pattern: str,
+        paths_right_pattern: Optional[str] = None,
+    ) -> List[Tuple[str, Optional[str]]]:
+        left_paths = list(sorted(glob(paths_left_pattern)))
+        right_paths: List[Union[None, str]]
+        if paths_right_pattern:
+            right_paths = list(sorted(glob(paths_right_pattern)))
+        else:
+            right_paths = list(None for _ in left_paths)
+        if not left_paths:
+            raise FileNotFoundError(f"Could not find any files matching the patterns: {paths_left_pattern}")
+        if not right_paths:
+            raise FileNotFoundError(f"Could not find any files matching the patterns: {paths_right_pattern}")
+        if len(left_paths) != len(right_paths):
+            raise ValueError(
+                f"Found {len(left_paths)} left files but {len(right_paths)} right files using:\n "
+                f"left pattern: {paths_left_pattern}\n"
+                f"right pattern: {paths_right_pattern}\n"
+            )
+        paths = list((left, right) for left, right in zip(left_paths, right_paths))
+        return paths
+    @abstractmethod
+    def _read_disparity(self, file_path: str) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+        # function that returns a disparity map and an occlusion map
+        pass
+    def __getitem__(self, index: int) -> Union[T1, T2]:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 3 or 4-tuple with ``(img_left, img_right, disparity, Optional[valid_mask])`` where ``valid_mask``
+                can be a numpy boolean mask of shape (H, W) if the dataset provides a file
+                indicating which disparity pixels are valid. The disparity is a numpy array of
+                shape (1, H, W) and the images are PIL images. ``disparity`` is None for
+                datasets on which for ``split="test"`` the authors did not provide annotations.
+        """
+        img_left = self._read_img(self._images[index][0])
+        img_right = self._read_img(self._images[index][1])
+        dsp_map_left, valid_mask_left = self._read_disparity(self._disparities[index][0])
+        dsp_map_right, valid_mask_right = self._read_disparity(self._disparities[index][1])
+        imgs = (img_left, img_right)
+        dsp_maps = (dsp_map_left, dsp_map_right)
+        valid_masks = (valid_mask_left, valid_mask_right)
+        if self.transforms is not None:
+            (
+                imgs,
+                dsp_maps,
+                valid_masks,
+            ) = self.transforms(imgs, dsp_maps, valid_masks)
+        if self._has_built_in_disparity_mask or valid_masks[0] is not None:
+            return imgs[0], imgs[1], dsp_maps[0], cast(np.ndarray, valid_masks[0])
+        else:
+            return imgs[0], imgs[1], dsp_maps[0]
+    def __len__(self) -> int:
+        return len(self._images)
+class CarlaStereo(StereoMatchingDataset):
+    """
+    Carla simulator data linked in the `CREStereo github repo <https://github.com/megvii-research/CREStereo>`_.
+    The dataset is expected to have the following structure: ::
+        root
+            carla-highres
+                trainingF
+                    scene1
+                        img0.png
+                        img1.png
+                        disp0GT.pfm
+                        disp1GT.pfm
+                        calib.txt
+                    scene2
+                        img0.png
+                        img1.png
+                        disp0GT.pfm
+                        disp1GT.pfm
+                        calib.txt
+                    ...
+    Args:
+        root (str or ``pathlib.Path``): Root directory where `carla-highres` is located.
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+    """
+    def __init__(self, root: Union[str, Path], transforms: Optional[Callable] = None) -> None:
+        super().__init__(root, transforms)
+        root = Path(root) / "carla-highres"
+        left_image_pattern = str(root / "trainingF" / "*" / "im0.png")
+        right_image_pattern = str(root / "trainingF" / "*" / "im1.png")
+        imgs = self._scan_pairs(left_image_pattern, right_image_pattern)
+        self._images = imgs
+        left_disparity_pattern = str(root / "trainingF" / "*" / "disp0GT.pfm")
+        right_disparity_pattern = str(root / "trainingF" / "*" / "disp1GT.pfm")
+        disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
+        self._disparities = disparities
+    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+        disparity_map = _read_pfm_file(file_path)
+        disparity_map = np.abs(disparity_map)  # ensure that the disparity is positive
+        valid_mask = None
+        return disparity_map, valid_mask
+    def __getitem__(self, index: int) -> T1:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 3-tuple with ``(img_left, img_right, disparity)``.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
+            If a ``valid_mask`` is generated within the ``transforms`` parameter,
+            a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
+        """
+        return cast(T1, super().__getitem__(index))
+class Kitti2012Stereo(StereoMatchingDataset):
+    """
+    KITTI dataset from the `2012 stereo evaluation benchmark <http://www.cvlibs.net/datasets/kitti/eval_stereo_flow.php>`_.
+    Uses the RGB images for consistency with KITTI 2015.
+    The dataset is expected to have the following structure: ::
+        root
+            Kitti2012
+                testing
+                    colored_0
+                        1_10.png
+                        2_10.png
+                        ...
+                    colored_1
+                        1_10.png
+                        2_10.png
+                        ...
+                training
+                    colored_0
+                        1_10.png
+                        2_10.png
+                        ...
+                    colored_1
+                        1_10.png
+                        2_10.png
+                        ...
+                    disp_noc
+                        1.png
+                        2.png
+                        ...
+                    calib
+    Args:
+        root (str or ``pathlib.Path``): Root directory where `Kitti2012` is located.
+        split (string, optional): The dataset split of scenes, either "train" (default) or "test".
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+    """
+    _has_built_in_disparity_mask = True
+    def __init__(self, root: Union[str, Path], split: str = "train", transforms: Optional[Callable] = None) -> None:
+        super().__init__(root, transforms)
+        verify_str_arg(split, "split", valid_values=("train", "test"))
+        root = Path(root) / "Kitti2012" / (split + "ing")
+        left_img_pattern = str(root / "colored_0" / "*_10.png")
+        right_img_pattern = str(root / "colored_1" / "*_10.png")
+        self._images = self._scan_pairs(left_img_pattern, right_img_pattern)
+        if split == "train":
+            disparity_pattern = str(root / "disp_noc" / "*.png")
+            self._disparities = self._scan_pairs(disparity_pattern, None)
+        else:
+            self._disparities = list((None, None) for _ in self._images)
+    def _read_disparity(self, file_path: str) -> Tuple[Optional[np.ndarray], None]:
+        # test split has no disparity maps
+        if file_path is None:
+            return None, None
+        disparity_map = np.asarray(Image.open(file_path)) / 256.0
+        # unsqueeze the disparity map into (C, H, W) format
+        disparity_map = disparity_map[None, :, :]
+        valid_mask = None
+        return disparity_map, valid_mask
+    def __getitem__(self, index: int) -> T1:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
+            ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
+            generate a valid mask.
+            Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test.
+        """
+        return cast(T1, super().__getitem__(index))
+class Kitti2015Stereo(StereoMatchingDataset):
+    """
+    KITTI dataset from the `2015 stereo evaluation benchmark <http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php>`_.
+    The dataset is expected to have the following structure: ::
+        root
+            Kitti2015
+                testing
+                    image_2
+                        img1.png
+                        img2.png
+                        ...
+                    image_3
+                        img1.png
+                        img2.png
+                        ...
+                training
+                    image_2
+                        img1.png
+                        img2.png
+                        ...
+                    image_3
+                        img1.png
+                        img2.png
+                        ...
+                    disp_occ_0
+                        img1.png
+                        img2.png
+                        ...
+                    disp_occ_1
+                        img1.png
+                        img2.png
+                        ...
+                    calib
+    Args:
+        root (str or ``pathlib.Path``): Root directory where `Kitti2015` is located.
+        split (string, optional): The dataset split of scenes, either "train" (default) or "test".
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+    """
+    _has_built_in_disparity_mask = True
+    def __init__(self, root: Union[str, Path], split: str = "train", transforms: Optional[Callable] = None) -> None:
+        super().__init__(root, transforms)
+        verify_str_arg(split, "split", valid_values=("train", "test"))
+        root = Path(root) / "Kitti2015" / (split + "ing")
+        left_img_pattern = str(root / "image_2" / "*.png")
+        right_img_pattern = str(root / "image_3" / "*.png")
+        self._images = self._scan_pairs(left_img_pattern, right_img_pattern)
+        if split == "train":
+            left_disparity_pattern = str(root / "disp_occ_0" / "*.png")
+            right_disparity_pattern = str(root / "disp_occ_1" / "*.png")
+            self._disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
+        else:
+            self._disparities = list((None, None) for _ in self._images)
+    def _read_disparity(self, file_path: str) -> Tuple[Optional[np.ndarray], None]:
+        # test split has no disparity maps
+        if file_path is None:
+            return None, None
+        disparity_map = np.asarray(Image.open(file_path)) / 256.0
+        # unsqueeze the disparity map into (C, H, W) format
+        disparity_map = disparity_map[None, :, :]
+        valid_mask = None
+        return disparity_map, valid_mask
+    def __getitem__(self, index: int) -> T1:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
+            ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
+            generate a valid mask.
+            Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test.
+        """
+        return cast(T1, super().__getitem__(index))
+class Middlebury2014Stereo(StereoMatchingDataset):
+    """Publicly available scenes from the Middlebury dataset `2014 version <https://vision.middlebury.edu/stereo/data/scenes2014/>`.
+    The dataset mostly follows the original format, without containing the ambient subdirectories.  : ::
+        root
+            Middlebury2014
+                train
+                    scene1-{perfect,imperfect}
+                        calib.txt
+                        im{0,1}.png
+                        im1E.png
+                        im1L.png
+                        disp{0,1}.pfm
+                        disp{0,1}-n.png
+                        disp{0,1}-sd.pfm
+                        disp{0,1}y.pfm
+                    scene2-{perfect,imperfect}
+                        calib.txt
+                        im{0,1}.png
+                        im1E.png
+                        im1L.png
+                        disp{0,1}.pfm
+                        disp{0,1}-n.png
+                        disp{0,1}-sd.pfm
+                        disp{0,1}y.pfm
+                    ...
+                additional
+                    scene1-{perfect,imperfect}
+                        calib.txt
+                        im{0,1}.png
+                        im1E.png
+                        im1L.png
+                        disp{0,1}.pfm
+                        disp{0,1}-n.png
+                        disp{0,1}-sd.pfm
+                        disp{0,1}y.pfm
+                    ...
+                test
+                    scene1
+                        calib.txt
+                        im{0,1}.png
+                    scene2
+                        calib.txt
+                        im{0,1}.png
+                    ...
+    Args:
+        root (str or ``pathlib.Path``): Root directory of the Middleburry 2014 Dataset.
+        split (string, optional): The dataset split of scenes, either "train" (default), "test", or "additional"
+        use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible.
+            The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``.
+        calibration (string, optional): Whether or not to use the calibrated (default) or uncalibrated scenes.
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+        download (boolean, optional): Whether or not to download the dataset in the ``root`` directory.
+    """
+    splits = {
+        "train": [
+            "Adirondack",
+            "Jadeplant",
+            "Motorcycle",
+            "Piano",
+            "Pipes",
+            "Playroom",
+            "Playtable",
+            "Recycle",
+            "Shelves",
+            "Vintage",
+        ],
+        "additional": [
+            "Backpack",
+            "Bicycle1",
+            "Cable",
+            "Classroom1",
+            "Couch",
+            "Flowers",
+            "Mask",
+            "Shopvac",
+            "Sticks",
+            "Storage",
+            "Sword1",
+            "Sword2",
+            "Umbrella",
+        ],
+        "test": [
+            "Plants",
+            "Classroom2E",
+            "Classroom2",
+            "Australia",
+            "DjembeL",
+            "CrusadeP",
+            "Crusade",
+            "Hoops",
+            "Bicycle2",
+            "Staircase",
+            "Newkuba",
+            "AustraliaP",
+            "Djembe",
+            "Livingroom",
+            "Computer",
+        ],
+    }
+    _has_built_in_disparity_mask = True
+    def __init__(
+        self,
+        root: Union[str, Path],
+        split: str = "train",
+        calibration: Optional[str] = "perfect",
+        use_ambient_views: bool = False,
+        transforms: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        super().__init__(root, transforms)
+        verify_str_arg(split, "split", valid_values=("train", "test", "additional"))
+        self.split = split
+        if calibration:
+            verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", "both", None))  # type: ignore
+            if split == "test":
+                raise ValueError("Split 'test' has only no calibration settings, please set `calibration=None`.")
+        else:
+            if split != "test":
+                raise ValueError(
+                    f"Split '{split}' has calibration settings, however None was provided as an argument."
+                    f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.",
+                )
+        if download:
+            self._download_dataset(root)
+        root = Path(root) / "Middlebury2014"
+        if not os.path.exists(root / split):
+            raise FileNotFoundError(f"The {split} directory was not found in the provided root directory")
+        split_scenes = self.splits[split]
+        # check that the provided root folder contains the scene splits
+        if not any(
+            # using startswith to account for perfect / imperfect calibrartion
+            scene.startswith(s)
+            for scene in os.listdir(root / split)
+            for s in split_scenes
+        ):
+            raise FileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.")
+        calibrartion_suffixes = {
+            None: [""],
+            "perfect": ["-perfect"],
+            "imperfect": ["-imperfect"],
+            "both": ["-perfect", "-imperfect"],
+        }[calibration]
+        for calibration_suffix in calibrartion_suffixes:
+            scene_pattern = "*" + calibration_suffix
+            left_img_pattern = str(root / split / scene_pattern / "im0.png")
+            right_img_pattern = str(root / split / scene_pattern / "im1.png")
+            self._images += self._scan_pairs(left_img_pattern, right_img_pattern)
+            if split == "test":
+                self._disparities = list((None, None) for _ in self._images)
+            else:
+                left_dispartity_pattern = str(root / split / scene_pattern / "disp0.pfm")
+                right_dispartity_pattern = str(root / split / scene_pattern / "disp1.pfm")
+                self._disparities += self._scan_pairs(left_dispartity_pattern, right_dispartity_pattern)
+        self.use_ambient_views = use_ambient_views
+    def _read_img(self, file_path: Union[str, Path]) -> Image.Image:
+        """
+        Function that reads either the original right image or an augmented view when ``use_ambient_views`` is True.
+        When ``use_ambient_views`` is True, the dataset will return at random one of ``[im1.png, im1E.png, im1L.png]``
+        as the right image.
+        """
+        ambient_file_paths: List[Union[str, Path]]  # make mypy happy
+        if not isinstance(file_path, Path):
+            file_path = Path(file_path)
+        if file_path.name == "im1.png" and self.use_ambient_views:
+            base_path = file_path.parent
+            # initialize sampleable container
+            ambient_file_paths = list(base_path / view_name for view_name in ["im1E.png", "im1L.png"])
+            # double check that we're not going to try to read from an invalid file path
+            ambient_file_paths = list(filter(lambda p: os.path.exists(p), ambient_file_paths))
+            # keep the original image as an option as well for uniform sampling between base views
+            ambient_file_paths.append(file_path)
+            file_path = random.choice(ambient_file_paths)  # type: ignore
+        return super()._read_img(file_path)
+    def _read_disparity(self, file_path: str) -> Union[Tuple[None, None], Tuple[np.ndarray, np.ndarray]]:
+        # test split has not disparity maps
+        if file_path is None:
+            return None, None
+        disparity_map = _read_pfm_file(file_path)
+        disparity_map = np.abs(disparity_map)  # ensure that the disparity is positive
+        disparity_map[disparity_map == np.inf] = 0  # remove infinite disparities
+        valid_mask = (disparity_map > 0).squeeze(0)  # mask out invalid disparities
+        return disparity_map, valid_mask
+    def _download_dataset(self, root: Union[str, Path]) -> None:
+        base_url = "https://vision.middlebury.edu/stereo/data/scenes2014/zip"
+        # train and additional splits have 2 different calibration settings
+        root = Path(root) / "Middlebury2014"
+        split_name = self.split
+        if split_name != "test":
+            for split_scene in self.splits[split_name]:
+                split_root = root / split_name
+                for calibration in ["perfect", "imperfect"]:
+                    scene_name = f"{split_scene}-{calibration}"
+                    scene_url = f"{base_url}/{scene_name}.zip"
+                    print(f"Downloading {scene_url}")
+                    # download the scene only if it doesn't exist
+                    if not (split_root / scene_name).exists():
+                        download_and_extract_archive(
+                            url=scene_url,
+                            filename=f"{scene_name}.zip",
+                            download_root=str(split_root),
+                            remove_finished=True,
+                        )
+        else:
+            os.makedirs(root / "test")
+            if any(s not in os.listdir(root / "test") for s in self.splits["test"]):
+                # test split is downloaded from a different location
+                test_set_url = "https://vision.middlebury.edu/stereo/submit3/zip/MiddEval3-data-F.zip"
+                # the unzip is going to produce a directory MiddEval3 with two subdirectories trainingF and testF
+                # we want to move the contents from testF into the  directory
+                download_and_extract_archive(url=test_set_url, download_root=str(root), remove_finished=True)
+                for scene_dir, scene_names, _ in os.walk(str(root / "MiddEval3/testF")):
+                    for scene in scene_names:
+                        scene_dst_dir = root / "test"
+                        scene_src_dir = Path(scene_dir) / scene
+                        os.makedirs(scene_dst_dir, exist_ok=True)
+                        shutil.move(str(scene_src_dir), str(scene_dst_dir))
+                # cleanup MiddEval3 directory
+                shutil.rmtree(str(root / "MiddEval3"))
+    def __getitem__(self, index: int) -> T2:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
+            ``valid_mask`` is implicitly ``None`` for `split=test`.
+        """
+        return cast(T2, super().__getitem__(index))
+class CREStereo(StereoMatchingDataset):
+    """Synthetic dataset used in training the `CREStereo <https://arxiv.org/pdf/2203.11483.pdf>`_ architecture.
+    Dataset details on the official paper `repo <https://github.com/megvii-research/CREStereo>`_.
+    The dataset is expected to have the following structure: ::
+        root
+            CREStereo
+                tree
+                    img1_left.jpg
+                    img1_right.jpg
+                    img1_left.disp.jpg
+                    img1_right.disp.jpg
+                    img2_left.jpg
+                    img2_right.jpg
+                    img2_left.disp.jpg
+                    img2_right.disp.jpg
+                    ...
+                shapenet
+                    img1_left.jpg
+                    img1_right.jpg
+                    img1_left.disp.jpg
+                    img1_right.disp.jpg
+                    ...
+                reflective
+                    img1_left.jpg
+                    img1_right.jpg
+                    img1_left.disp.jpg
+                    img1_right.disp.jpg
+                    ...
+                hole
+                    img1_left.jpg
+                    img1_right.jpg
+                    img1_left.disp.jpg
+                    img1_right.disp.jpg
+                    ...
+    Args:
+        root (str): Root directory of the dataset.
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+    """
+    _has_built_in_disparity_mask = True
+    def __init__(
+        self,
+        root: Union[str, Path],
+        transforms: Optional[Callable] = None,
+    ) -> None:
+        super().__init__(root, transforms)
+        root = Path(root) / "CREStereo"
+        dirs = ["shapenet", "reflective", "tree", "hole"]
+        for s in dirs:
+            left_image_pattern = str(root / s / "*_left.jpg")
+            right_image_pattern = str(root / s / "*_right.jpg")
+            imgs = self._scan_pairs(left_image_pattern, right_image_pattern)
+            self._images += imgs
+            left_disparity_pattern = str(root / s / "*_left.disp.png")
+            right_disparity_pattern = str(root / s / "*_right.disp.png")
+            disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
+            self._disparities += disparities
+    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+        disparity_map = np.asarray(Image.open(file_path), dtype=np.float32)
+        # unsqueeze the disparity map into (C, H, W) format
+        disparity_map = disparity_map[None, :, :] / 32.0
+        valid_mask = None
+        return disparity_map, valid_mask
+    def __getitem__(self, index: int) -> T1:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
+            ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
+            generate a valid mask.
+        """
+        return cast(T1, super().__getitem__(index))
+class FallingThingsStereo(StereoMatchingDataset):
+    """`FallingThings <https://research.nvidia.com/publication/2018-06_falling-things-synthetic-dataset-3d-object-detection-and-pose-estimation>`_ dataset.
+    The dataset is expected to have the following structure: ::
+        root
+            FallingThings
+                single
+                    dir1
+                        scene1
+                            _object_settings.json
+                            _camera_settings.json
+                            image1.left.depth.png
+                            image1.right.depth.png
+                            image1.left.jpg
+                            image1.right.jpg
+                            image2.left.depth.png
+                            image2.right.depth.png
+                            image2.left.jpg
+                            image2.right
+                            ...
+                        scene2
+                    ...
+                mixed
+                    scene1
+                        _object_settings.json
+                        _camera_settings.json
+                        image1.left.depth.png
+                        image1.right.depth.png
+                        image1.left.jpg
+                        image1.right.jpg
+                        image2.left.depth.png
+                        image2.right.depth.png
+                        image2.left.jpg
+                        image2.right
+                        ...
+                    scene2
+                    ...
+    Args:
+        root (str or ``pathlib.Path``): Root directory where FallingThings is located.
+        variant (string): Which variant to use. Either "single", "mixed", or "both".
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+    """
+    def __init__(self, root: Union[str, Path], variant: str = "single", transforms: Optional[Callable] = None) -> None:
+        super().__init__(root, transforms)
+        root = Path(root) / "FallingThings"
+        verify_str_arg(variant, "variant", valid_values=("single", "mixed", "both"))
+        variants = {
+            "single": ["single"],
+            "mixed": ["mixed"],
+            "both": ["single", "mixed"],
+        }[variant]
+        split_prefix = {
+            "single": Path("*") / "*",
+            "mixed": Path("*"),
+        }
+        for s in variants:
+            left_img_pattern = str(root / s / split_prefix[s] / "*.left.jpg")
+            right_img_pattern = str(root / s / split_prefix[s] / "*.right.jpg")
+            self._images += self._scan_pairs(left_img_pattern, right_img_pattern)
+            left_disparity_pattern = str(root / s / split_prefix[s] / "*.left.depth.png")
+            right_disparity_pattern = str(root / s / split_prefix[s] / "*.right.depth.png")
+            self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
+    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+        # (H, W) image
+        depth = np.asarray(Image.open(file_path))
+        # as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt
+        # in order to extract disparity from depth maps
+        camera_settings_path = Path(file_path).parent / "_camera_settings.json"
+        with open(camera_settings_path, "r") as f:
+            # inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constant)
+            intrinsics = json.load(f)
+            focal = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"]
+            baseline, pixel_constant = 6, 100  # pixel constant is inverted
+            disparity_map = (baseline * focal * pixel_constant) / depth.astype(np.float32)
+            # unsqueeze disparity to (C, H, W)
+            disparity_map = disparity_map[None, :, :]
+            valid_mask = None
+            return disparity_map, valid_mask
+    def __getitem__(self, index: int) -> T1:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 3-tuple with ``(img_left, img_right, disparity)``.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
+            If a ``valid_mask`` is generated within the ``transforms`` parameter,
+            a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
+        """
+        return cast(T1, super().__getitem__(index))
+class SceneFlowStereo(StereoMatchingDataset):
+    """Dataset interface for `Scene Flow <https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html>`_ datasets.
+    This interface provides access to the `FlyingThings3D, `Monkaa` and `Driving` datasets.
+    The dataset is expected to have the following structure: ::
+        root
+            SceneFlow
+                Monkaa
+                    frames_cleanpass
+                        scene1
+                            left
+                                img1.png
+                                img2.png
+                            right
+                                img1.png
+                                img2.png
+                        scene2
+                            left
+                                img1.png
+                                img2.png
+                            right
+                                img1.png
+                                img2.png
+                    frames_finalpass
+                        scene1
+                            left
+                                img1.png
+                                img2.png
+                            right
+                                img1.png
+                                img2.png
+                        ...
+                        ...
+                    disparity
+                        scene1
+                            left
+                                img1.pfm
+                                img2.pfm
+                            right
+                                img1.pfm
+                                img2.pfm
+                FlyingThings3D
+                    ...
+                    ...
+    Args:
+        root (str or ``pathlib.Path``): Root directory where SceneFlow is located.
+        variant (string): Which dataset variant to user, "FlyingThings3D" (default), "Monkaa" or "Driving".
+        pass_name (string): Which pass to use, "clean" (default), "final" or "both".
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        variant: str = "FlyingThings3D",
+        pass_name: str = "clean",
+        transforms: Optional[Callable] = None,
+    ) -> None:
+        super().__init__(root, transforms)
+        root = Path(root) / "SceneFlow"
+        verify_str_arg(variant, "variant", valid_values=("FlyingThings3D", "Driving", "Monkaa"))
+        verify_str_arg(pass_name, "pass_name", valid_values=("clean", "final", "both"))
+        passes = {
+            "clean": ["frames_cleanpass"],
+            "final": ["frames_finalpass"],
+            "both": ["frames_cleanpass", "frames_finalpass"],
+        }[pass_name]
+        root = root / variant
+        prefix_directories = {
+            "Monkaa": Path("*"),
+            "FlyingThings3D": Path("*") / "*" / "*",
+            "Driving": Path("*") / "*" / "*",
+        }
+        for p in passes:
+            left_image_pattern = str(root / p / prefix_directories[variant] / "left" / "*.png")
+            right_image_pattern = str(root / p / prefix_directories[variant] / "right" / "*.png")
+            self._images += self._scan_pairs(left_image_pattern, right_image_pattern)
+            left_disparity_pattern = str(root / "disparity" / prefix_directories[variant] / "left" / "*.pfm")
+            right_disparity_pattern = str(root / "disparity" / prefix_directories[variant] / "right" / "*.pfm")
+            self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
+    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+        disparity_map = _read_pfm_file(file_path)
+        disparity_map = np.abs(disparity_map)  # ensure that the disparity is positive
+        valid_mask = None
+        return disparity_map, valid_mask
+    def __getitem__(self, index: int) -> T1:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 3-tuple with ``(img_left, img_right, disparity)``.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
+            If a ``valid_mask`` is generated within the ``transforms`` parameter,
+            a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
+        """
+        return cast(T1, super().__getitem__(index))
+class SintelStereo(StereoMatchingDataset):
+    """Sintel `Stereo Dataset <http://sintel.is.tue.mpg.de/stereo>`_.
+    The dataset is expected to have the following structure: ::
+        root
+            Sintel
+                training
+                    final_left
+                        scene1
+                            img1.png
+                            img2.png
+                            ...
+                        ...
+                    final_right
+                        scene2
+                            img1.png
+                            img2.png
+                            ...
+                        ...
+                    disparities
+                        scene1
+                            img1.png
+                            img2.png
+                            ...
+                        ...
+                    occlusions
+                        scene1
+                            img1.png
+                            img2.png
+                            ...
+                        ...
+                    outofframe
+                        scene1
+                            img1.png
+                            img2.png
+                            ...
+                        ...
+    Args:
+        root (str or ``pathlib.Path``): Root directory where Sintel Stereo is located.
+        pass_name (string): The name of the pass to use, either "final", "clean" or "both".
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+    """
+    _has_built_in_disparity_mask = True
+    def __init__(self, root: Union[str, Path], pass_name: str = "final", transforms: Optional[Callable] = None) -> None:
+        super().__init__(root, transforms)
+        verify_str_arg(pass_name, "pass_name", valid_values=("final", "clean", "both"))
+        root = Path(root) / "Sintel"
+        pass_names = {
+            "final": ["final"],
+            "clean": ["clean"],
+            "both": ["final", "clean"],
+        }[pass_name]
+        for p in pass_names:
+            left_img_pattern = str(root / "training" / f"{p}_left" / "*" / "*.png")
+            right_img_pattern = str(root / "training" / f"{p}_right" / "*" / "*.png")
+            self._images += self._scan_pairs(left_img_pattern, right_img_pattern)
+            disparity_pattern = str(root / "training" / "disparities" / "*" / "*.png")
+            self._disparities += self._scan_pairs(disparity_pattern, None)
+    def _get_occlussion_mask_paths(self, file_path: str) -> Tuple[str, str]:
+        # helper function to get the occlusion mask paths
+        # a path will look like  .../.../.../training/disparities/scene1/img1.png
+        # we want to get something like .../.../.../training/occlusions/scene1/img1.png
+        fpath = Path(file_path)
+        basename = fpath.name
+        scenedir = fpath.parent
+        # the parent of the scenedir is actually the disparity dir
+        sampledir = scenedir.parent.parent
+        occlusion_path = str(sampledir / "occlusions" / scenedir.name / basename)
+        outofframe_path = str(sampledir / "outofframe" / scenedir.name / basename)
+        if not os.path.exists(occlusion_path):
+            raise FileNotFoundError(f"Occlusion mask {occlusion_path} does not exist")
+        if not os.path.exists(outofframe_path):
+            raise FileNotFoundError(f"Out of frame mask {outofframe_path} does not exist")
+        return occlusion_path, outofframe_path
+    def _read_disparity(self, file_path: str) -> Union[Tuple[None, None], Tuple[np.ndarray, np.ndarray]]:
+        if file_path is None:
+            return None, None
+        # disparity decoding as per Sintel instructions in the README provided with the dataset
+        disparity_map = np.asarray(Image.open(file_path), dtype=np.float32)
+        r, g, b = np.split(disparity_map, 3, axis=-1)
+        disparity_map = r * 4 + g / (2**6) + b / (2**14)
+        # reshape into (C, H, W) format
+        disparity_map = np.transpose(disparity_map, (2, 0, 1))
+        # find the appropriate file paths
+        occlued_mask_path, out_of_frame_mask_path = self._get_occlussion_mask_paths(file_path)
+        # occlusion masks
+        valid_mask = np.asarray(Image.open(occlued_mask_path)) == 0
+        # out of frame masks
+        off_mask = np.asarray(Image.open(out_of_frame_mask_path)) == 0
+        # combine the masks together
+        valid_mask = np.logical_and(off_mask, valid_mask)
+        return disparity_map, valid_mask
+    def __getitem__(self, index: int) -> T2:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images whilst
+            the valid_mask is a numpy array of shape (H, W).
+        """
+        return cast(T2, super().__getitem__(index))
+class InStereo2k(StereoMatchingDataset):
+    """`InStereo2k <https://github.com/YuhuaXu/StereoDataset>`_ dataset.
+    The dataset is expected to have the following structure: ::
+        root
+            InStereo2k
+                train
+                    scene1
+                        left.png
+                        right.png
+                        left_disp.png
+                        right_disp.png
+                        ...
+                    scene2
+                    ...
+                test
+                    scene1
+                        left.png
+                        right.png
+                        left_disp.png
+                        right_disp.png
+                        ...
+                    scene2
+                    ...
+    Args:
+        root (str or ``pathlib.Path``): Root directory where InStereo2k is located.
+        split (string): Either "train" or "test".
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+    """
+    def __init__(self, root: Union[str, Path], split: str = "train", transforms: Optional[Callable] = None) -> None:
+        super().__init__(root, transforms)
+        root = Path(root) / "InStereo2k" / split
+        verify_str_arg(split, "split", valid_values=("train", "test"))
+        left_img_pattern = str(root / "*" / "left.png")
+        right_img_pattern = str(root / "*" / "right.png")
+        self._images = self._scan_pairs(left_img_pattern, right_img_pattern)
+        left_disparity_pattern = str(root / "*" / "left_disp.png")
+        right_disparity_pattern = str(root / "*" / "right_disp.png")
+        self._disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
+    def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
+        disparity_map = np.asarray(Image.open(file_path), dtype=np.float32)
+        # unsqueeze disparity to (C, H, W)
+        disparity_map = disparity_map[None, :, :] / 1024.0
+        valid_mask = None
+        return disparity_map, valid_mask
+    def __getitem__(self, index: int) -> T1:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 3-tuple with ``(img_left, img_right, disparity)``.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
+            If a ``valid_mask`` is generated within the ``transforms`` parameter,
+            a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
+        """
+        return cast(T1, super().__getitem__(index))
+class ETH3DStereo(StereoMatchingDataset):
+    """ETH3D `Low-Res Two-View <https://www.eth3d.net/datasets>`_ dataset.
+    The dataset is expected to have the following structure: ::
+        root
+            ETH3D
+                two_view_training
+                    scene1
+                        im1.png
+                        im0.png
+                        images.txt
+                        cameras.txt
+                        calib.txt
+                    scene2
+                        im1.png
+                        im0.png
+                        images.txt
+                        cameras.txt
+                        calib.txt
+                    ...
+                two_view_training_gt
+                    scene1
+                        disp0GT.pfm
+                        mask0nocc.png
+                    scene2
+                        disp0GT.pfm
+                        mask0nocc.png
+                    ...
+                two_view_testing
+                    scene1
+                        im1.png
+                        im0.png
+                        images.txt
+                        cameras.txt
+                        calib.txt
+                    scene2
+                        im1.png
+                        im0.png
+                        images.txt
+                        cameras.txt
+                        calib.txt
+                    ...
+    Args:
+        root (str or ``pathlib.Path``): Root directory of the ETH3D Dataset.
+        split (string, optional): The dataset split of scenes, either "train" (default) or "test".
+        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
+    """
+    _has_built_in_disparity_mask = True
+    def __init__(self, root: Union[str, Path], split: str = "train", transforms: Optional[Callable] = None) -> None:
+        super().__init__(root, transforms)
+        verify_str_arg(split, "split", valid_values=("train", "test"))
+        root = Path(root) / "ETH3D"
+        img_dir = "two_view_training" if split == "train" else "two_view_test"
+        anot_dir = "two_view_training_gt"
+        left_img_pattern = str(root / img_dir / "*" / "im0.png")
+        right_img_pattern = str(root / img_dir / "*" / "im1.png")
+        self._images = self._scan_pairs(left_img_pattern, right_img_pattern)
+        if split == "test":
+            self._disparities = list((None, None) for _ in self._images)
+        else:
+            disparity_pattern = str(root / anot_dir / "*" / "disp0GT.pfm")
+            self._disparities = self._scan_pairs(disparity_pattern, None)
+    def _read_disparity(self, file_path: str) -> Union[Tuple[None, None], Tuple[np.ndarray, np.ndarray]]:
+        # test split has no disparity maps
+        if file_path is None:
+            return None, None
+        disparity_map = _read_pfm_file(file_path)
+        disparity_map = np.abs(disparity_map)  # ensure that the disparity is positive
+        mask_path = Path(file_path).parent / "mask0nocc.png"
+        valid_mask = Image.open(mask_path)
+        valid_mask = np.asarray(valid_mask).astype(bool)
+        return disparity_map, valid_mask
+    def __getitem__(self, index: int) -> T2:
+        """Return example at given index.
+        Args:
+            index(int): The index of the example to retrieve
+        Returns:
+            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
+            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
+            ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
+            generate a valid mask.
+            Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test.
+        """
+        return cast(T2, super().__getitem__(index))

.venv/lib/python3.11/site-packages/torchvision/datasets/caltech.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import os
+import os.path
+from pathlib import Path
+from typing import Any, Callable, List, Optional, Tuple, Union
+from PIL import Image
+from .utils import download_and_extract_archive, verify_str_arg
+from .vision import VisionDataset
+class Caltech101(VisionDataset):
+    """`Caltech 101 <https://data.caltech.edu/records/20086>`_ Dataset.
+    .. warning::
+        This class needs `scipy <https://docs.scipy.org/doc/>`_ to load target files from `.mat` format.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of dataset where directory
+            ``caltech101`` exists or will be saved to if download is set to True.
+        target_type (string or list, optional): Type of target to use, ``category`` or
+            ``annotation``. Can also be a list to output a tuple with all specified
+            target types.  ``category`` represents the target class, and
+            ``annotation`` is a list of points from a hand-generated outline.
+            Defaults to ``category``.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+            .. warning::
+                To download the dataset `gdown <https://github.com/wkentaro/gdown>`_ is required.
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        target_type: Union[List[str], str] = "category",
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        super().__init__(os.path.join(root, "caltech101"), transform=transform, target_transform=target_transform)
+        os.makedirs(self.root, exist_ok=True)
+        if isinstance(target_type, str):
+            target_type = [target_type]
+        self.target_type = [verify_str_arg(t, "target_type", ("category", "annotation")) for t in target_type]
+        if download:
+            self.download()
+        if not self._check_integrity():
+            raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")
+        self.categories = sorted(os.listdir(os.path.join(self.root, "101_ObjectCategories")))
+        self.categories.remove("BACKGROUND_Google")  # this is not a real class
+        # For some reason, the category names in "101_ObjectCategories" and
+        # "Annotations" do not always match. This is a manual map between the
+        # two. Defaults to using same name, since most names are fine.
+        name_map = {
+            "Faces": "Faces_2",
+            "Faces_easy": "Faces_3",
+            "Motorbikes": "Motorbikes_16",
+            "airplanes": "Airplanes_Side_2",
+        }
+        self.annotation_categories = list(map(lambda x: name_map[x] if x in name_map else x, self.categories))
+        self.index: List[int] = []
+        self.y = []
+        for (i, c) in enumerate(self.categories):
+            n = len(os.listdir(os.path.join(self.root, "101_ObjectCategories", c)))
+            self.index.extend(range(1, n + 1))
+            self.y.extend(n * [i])
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where the type of target specified by target_type.
+        """
+        import scipy.io
+        img = Image.open(
+            os.path.join(
+                self.root,
+                "101_ObjectCategories",
+                self.categories[self.y[index]],
+                f"image_{self.index[index]:04d}.jpg",
+            )
+        )
+        target: Any = []
+        for t in self.target_type:
+            if t == "category":
+                target.append(self.y[index])
+            elif t == "annotation":
+                data = scipy.io.loadmat(
+                    os.path.join(
+                        self.root,
+                        "Annotations",
+                        self.annotation_categories[self.y[index]],
+                        f"annotation_{self.index[index]:04d}.mat",
+                    )
+                )
+                target.append(data["obj_contour"])
+        target = tuple(target) if len(target) > 1 else target[0]
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return img, target
+    def _check_integrity(self) -> bool:
+        # can be more robust and check hash of files
+        return os.path.exists(os.path.join(self.root, "101_ObjectCategories"))
+    def __len__(self) -> int:
+        return len(self.index)
+    def download(self) -> None:
+        if self._check_integrity():
+            print("Files already downloaded and verified")
+            return
+        download_and_extract_archive(
+            "https://drive.google.com/file/d/137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp",
+            self.root,
+            filename="101_ObjectCategories.tar.gz",
+            md5="b224c7392d521a49829488ab0f1120d9",
+        )
+        download_and_extract_archive(
+            "https://drive.google.com/file/d/175kQy3UsZ0wUEHZjqkUDdNVssr7bgh_m",
+            self.root,
+            filename="Annotations.tar",
+            md5="6f83eeb1f24d99cab4eb377263132c91",
+        )
+    def extra_repr(self) -> str:
+        return "Target type: {target_type}".format(**self.__dict__)
+class Caltech256(VisionDataset):
+    """`Caltech 256 <https://data.caltech.edu/records/20087>`_ Dataset.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of dataset where directory
+            ``caltech256`` exists or will be saved to if download is set to True.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+    def __init__(
+        self,
+        root: str,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        super().__init__(os.path.join(root, "caltech256"), transform=transform, target_transform=target_transform)
+        os.makedirs(self.root, exist_ok=True)
+        if download:
+            self.download()
+        if not self._check_integrity():
+            raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")
+        self.categories = sorted(os.listdir(os.path.join(self.root, "256_ObjectCategories")))
+        self.index: List[int] = []
+        self.y = []
+        for (i, c) in enumerate(self.categories):
+            n = len(
+                [
+                    item
+                    for item in os.listdir(os.path.join(self.root, "256_ObjectCategories", c))
+                    if item.endswith(".jpg")
+                ]
+            )
+            self.index.extend(range(1, n + 1))
+            self.y.extend(n * [i])
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is index of the target class.
+        """
+        img = Image.open(
+            os.path.join(
+                self.root,
+                "256_ObjectCategories",
+                self.categories[self.y[index]],
+                f"{self.y[index] + 1:03d}_{self.index[index]:04d}.jpg",
+            )
+        )
+        target = self.y[index]
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return img, target
+    def _check_integrity(self) -> bool:
+        # can be more robust and check hash of files
+        return os.path.exists(os.path.join(self.root, "256_ObjectCategories"))
+    def __len__(self) -> int:
+        return len(self.index)
+    def download(self) -> None:
+        if self._check_integrity():
+            print("Files already downloaded and verified")
+            return
+        download_and_extract_archive(
+            "https://drive.google.com/file/d/1r6o0pSROcV1_VwT4oSjA2FBUSCWGuxLK",
+            self.root,
+            filename="256_ObjectCategories.tar",
+            md5="67b4f42ca05d46448c6bb8ecd2220f6d",
+        )

.venv/lib/python3.11/site-packages/torchvision/datasets/celeba.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import csv
+import os
+from collections import namedtuple
+from pathlib import Path
+from typing import Any, Callable, List, Optional, Tuple, Union
+import PIL
+import torch
+from .utils import check_integrity, download_file_from_google_drive, extract_archive, verify_str_arg
+from .vision import VisionDataset
+CSV = namedtuple("CSV", ["header", "index", "data"])
+class CelebA(VisionDataset):
+    """`Large-scale CelebFaces Attributes (CelebA) Dataset <http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html>`_ Dataset.
+    Args:
+        root (str or ``pathlib.Path``): Root directory where images are downloaded to.
+        split (string): One of {'train', 'valid', 'test', 'all'}.
+            Accordingly dataset is selected.
+        target_type (string or list, optional): Type of target to use, ``attr``, ``identity``, ``bbox``,
+            or ``landmarks``. Can also be a list to output a tuple with all specified target types.
+            The targets represent:
+                - ``attr`` (Tensor shape=(40,) dtype=int): binary (0, 1) labels for attributes
+                - ``identity`` (int): label for each person (data points with the same identity are the same person)
+                - ``bbox`` (Tensor shape=(4,) dtype=int): bounding box (x, y, width, height)
+                - ``landmarks`` (Tensor shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x,
+                  righteye_y, nose_x, nose_y, leftmouth_x, leftmouth_y, rightmouth_x, rightmouth_y)
+            Defaults to ``attr``. If empty, ``None`` will be returned as target.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.PILToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+            .. warning::
+                To download the dataset `gdown <https://github.com/wkentaro/gdown>`_ is required.
+    """
+    base_folder = "celeba"
+    # There currently does not appear to be an easy way to extract 7z in python (without introducing additional
+    # dependencies). The "in-the-wild" (not aligned+cropped) images are only in 7z, so they are not available
+    # right now.
+    file_list = [
+        # File ID                                      MD5 Hash                            Filename
+        ("0B7EVK8r0v71pZjFTYXZWM3FlRnM", "00d2c5bc6d35e252742224ab0c1e8fcb", "img_align_celeba.zip"),
+        # ("0B7EVK8r0v71pbWNEUjJKdDQ3dGc","b6cd7e93bc7a96c2dc33f819aa3ac651", "img_align_celeba_png.7z"),
+        # ("0B7EVK8r0v71peklHb0pGdDl6R28", "b6cd7e93bc7a96c2dc33f819aa3ac651", "img_celeba.7z"),
+        ("0B7EVK8r0v71pblRyaVFSWGxPY0U", "75e246fa4810816ffd6ee81facbd244c", "list_attr_celeba.txt"),
+        ("1_ee_0u7vcNLOfNLegJRHmolfH5ICW-XS", "32bd1bd63d3c78cd57e08160ec5ed1e2", "identity_CelebA.txt"),
+        ("0B7EVK8r0v71pbThiMVRxWXZ4dU0", "00566efa6fedff7a56946cd1c10f1c16", "list_bbox_celeba.txt"),
+        ("0B7EVK8r0v71pd0FJY3Blby1HUTQ", "cc24ecafdb5b50baae59b03474781f8c", "list_landmarks_align_celeba.txt"),
+        # ("0B7EVK8r0v71pTzJIdlJWdHczRlU", "063ee6ddb681f96bc9ca28c6febb9d1a", "list_landmarks_celeba.txt"),
+        ("0B7EVK8r0v71pY0NSMzRuSXJEVkk", "d32c9cbf5e040fd4025c592c306e6668", "list_eval_partition.txt"),
+    ]
+    def __init__(
+        self,
+        root: Union[str, Path],
+        split: str = "train",
+        target_type: Union[List[str], str] = "attr",
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self.split = split
+        if isinstance(target_type, list):
+            self.target_type = target_type
+        else:
+            self.target_type = [target_type]
+        if not self.target_type and self.target_transform is not None:
+            raise RuntimeError("target_transform is specified but target_type is empty")
+        if download:
+            self.download()
+        if not self._check_integrity():
+            raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")
+        split_map = {
+            "train": 0,
+            "valid": 1,
+            "test": 2,
+            "all": None,
+        }
+        split_ = split_map[verify_str_arg(split.lower(), "split", ("train", "valid", "test", "all"))]
+        splits = self._load_csv("list_eval_partition.txt")
+        identity = self._load_csv("identity_CelebA.txt")
+        bbox = self._load_csv("list_bbox_celeba.txt", header=1)
+        landmarks_align = self._load_csv("list_landmarks_align_celeba.txt", header=1)
+        attr = self._load_csv("list_attr_celeba.txt", header=1)
+        mask = slice(None) if split_ is None else (splits.data == split_).squeeze()
+        if mask == slice(None):  # if split == "all"
+            self.filename = splits.index
+        else:
+            self.filename = [splits.index[i] for i in torch.squeeze(torch.nonzero(mask))]
+        self.identity = identity.data[mask]
+        self.bbox = bbox.data[mask]
+        self.landmarks_align = landmarks_align.data[mask]
+        self.attr = attr.data[mask]
+        # map from {-1, 1} to {0, 1}
+        self.attr = torch.div(self.attr + 1, 2, rounding_mode="floor")
+        self.attr_names = attr.header
+    def _load_csv(
+        self,
+        filename: str,
+        header: Optional[int] = None,
+    ) -> CSV:
+        with open(os.path.join(self.root, self.base_folder, filename)) as csv_file:
+            data = list(csv.reader(csv_file, delimiter=" ", skipinitialspace=True))
+        if header is not None:
+            headers = data[header]
+            data = data[header + 1 :]
+        else:
+            headers = []
+        indices = [row[0] for row in data]
+        data = [row[1:] for row in data]
+        data_int = [list(map(int, i)) for i in data]
+        return CSV(headers, indices, torch.tensor(data_int))
+    def _check_integrity(self) -> bool:
+        for (_, md5, filename) in self.file_list:
+            fpath = os.path.join(self.root, self.base_folder, filename)
+            _, ext = os.path.splitext(filename)
+            # Allow original archive to be deleted (zip and 7z)
+            # Only need the extracted images
+            if ext not in [".zip", ".7z"] and not check_integrity(fpath, md5):
+                return False
+        # Should check a hash of the images
+        return os.path.isdir(os.path.join(self.root, self.base_folder, "img_align_celeba"))
+    def download(self) -> None:
+        if self._check_integrity():
+            print("Files already downloaded and verified")
+            return
+        for (file_id, md5, filename) in self.file_list:
+            download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5)
+        extract_archive(os.path.join(self.root, self.base_folder, "img_align_celeba.zip"))
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        X = PIL.Image.open(os.path.join(self.root, self.base_folder, "img_align_celeba", self.filename[index]))
+        target: Any = []
+        for t in self.target_type:
+            if t == "attr":
+                target.append(self.attr[index, :])
+            elif t == "identity":
+                target.append(self.identity[index, 0])
+            elif t == "bbox":
+                target.append(self.bbox[index, :])
+            elif t == "landmarks":
+                target.append(self.landmarks_align[index, :])
+            else:
+                # TODO: refactor with utils.verify_str_arg
+                raise ValueError(f'Target type "{t}" is not recognized.')
+        if self.transform is not None:
+            X = self.transform(X)
+        if target:
+            target = tuple(target) if len(target) > 1 else target[0]
+            if self.target_transform is not None:
+                target = self.target_transform(target)
+        else:
+            target = None
+        return X, target
+    def __len__(self) -> int:
+        return len(self.attr)
+    def extra_repr(self) -> str:
+        lines = ["Target type: {target_type}", "Split: {split}"]
+        return "\n".join(lines).format(**self.__dict__)

.venv/lib/python3.11/site-packages/torchvision/datasets/cifar.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import os.path
+import pickle
+from pathlib import Path
+from typing import Any, Callable, Optional, Tuple, Union
+import numpy as np
+from PIL import Image
+from .utils import check_integrity, download_and_extract_archive
+from .vision import VisionDataset
+class CIFAR10(VisionDataset):
+    """`CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of dataset where directory
+            ``cifar-10-batches-py`` exists or will be saved to if download is set to True.
+        train (bool, optional): If True, creates dataset from training set, otherwise
+            creates from test set.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+    base_folder = "cifar-10-batches-py"
+    url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
+    filename = "cifar-10-python.tar.gz"
+    tgz_md5 = "c58f30108f718f92721af3b95e74349a"
+    train_list = [
+        ["data_batch_1", "c99cafc152244af753f735de768cd75f"],
+        ["data_batch_2", "d4bba439e000b95fd0a9bffe97cbabec"],
+        ["data_batch_3", "54ebc095f3ab1f0389bbae665268c751"],
+        ["data_batch_4", "634d18415352ddfa80567beed471001a"],
+        ["data_batch_5", "482c414d41f54cd18b22e5b47cb7c3cb"],
+    ]
+    test_list = [
+        ["test_batch", "40351d587109b95175f43aff81a1287e"],
+    ]
+    meta = {
+        "filename": "batches.meta",
+        "key": "label_names",
+        "md5": "5ff9c542aee3614f3951f8cda6e48888",
+    }
+    def __init__(
+        self,
+        root: Union[str, Path],
+        train: bool = True,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self.train = train  # training set or test set
+        if download:
+            self.download()
+        if not self._check_integrity():
+            raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")
+        if self.train:
+            downloaded_list = self.train_list
+        else:
+            downloaded_list = self.test_list
+        self.data: Any = []
+        self.targets = []
+        # now load the picked numpy arrays
+        for file_name, checksum in downloaded_list:
+            file_path = os.path.join(self.root, self.base_folder, file_name)
+            with open(file_path, "rb") as f:
+                entry = pickle.load(f, encoding="latin1")
+                self.data.append(entry["data"])
+                if "labels" in entry:
+                    self.targets.extend(entry["labels"])
+                else:
+                    self.targets.extend(entry["fine_labels"])
+        self.data = np.vstack(self.data).reshape(-1, 3, 32, 32)
+        self.data = self.data.transpose((0, 2, 3, 1))  # convert to HWC
+        self._load_meta()
+    def _load_meta(self) -> None:
+        path = os.path.join(self.root, self.base_folder, self.meta["filename"])
+        if not check_integrity(path, self.meta["md5"]):
+            raise RuntimeError("Dataset metadata file not found or corrupted. You can use download=True to download it")
+        with open(path, "rb") as infile:
+            data = pickle.load(infile, encoding="latin1")
+            self.classes = data[self.meta["key"]]
+        self.class_to_idx = {_class: i for i, _class in enumerate(self.classes)}
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is index of the target class.
+        """
+        img, target = self.data[index], self.targets[index]
+        # doing this so that it is consistent with all other datasets
+        # to return a PIL Image
+        img = Image.fromarray(img)
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return img, target
+    def __len__(self) -> int:
+        return len(self.data)
+    def _check_integrity(self) -> bool:
+        for filename, md5 in self.train_list + self.test_list:
+            fpath = os.path.join(self.root, self.base_folder, filename)
+            if not check_integrity(fpath, md5):
+                return False
+        return True
+    def download(self) -> None:
+        if self._check_integrity():
+            print("Files already downloaded and verified")
+            return
+        download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.tgz_md5)
+    def extra_repr(self) -> str:
+        split = "Train" if self.train is True else "Test"
+        return f"Split: {split}"
+class CIFAR100(CIFAR10):
+    """`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
+    This is a subclass of the `CIFAR10` Dataset.
+    """
+    base_folder = "cifar-100-python"
+    url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
+    filename = "cifar-100-python.tar.gz"
+    tgz_md5 = "eb9058c3a382ffc7106e4002c42a8d85"
+    train_list = [
+        ["train", "16019d7e3df5f24257cddd939b257f8d"],
+    ]
+    test_list = [
+        ["test", "f0ef6b0ae62326f3e7ffdfab6717acfc"],
+    ]
+    meta = {
+        "filename": "meta",
+        "key": "fine_label_names",
+        "md5": "7973b15100ade9c7d40fb424638fde48",
+    }

.venv/lib/python3.11/site-packages/torchvision/datasets/cityscapes.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import json
+import os
+from collections import namedtuple
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from PIL import Image
+from .utils import extract_archive, iterable_to_str, verify_str_arg
+from .vision import VisionDataset
+class Cityscapes(VisionDataset):
+    """`Cityscapes <http://www.cityscapes-dataset.com/>`_ Dataset.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of dataset where directory ``leftImg8bit``
+            and ``gtFine`` or ``gtCoarse`` are located.
+        split (string, optional): The image split to use, ``train``, ``test`` or ``val`` if mode="fine"
+            otherwise ``train``, ``train_extra`` or ``val``
+        mode (string, optional): The quality mode to use, ``fine`` or ``coarse``
+        target_type (string or list, optional): Type of target to use, ``instance``, ``semantic``, ``polygon``
+            or ``color``. Can also be a list to output a tuple with all specified target types.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+    Examples:
+        Get semantic segmentation target
+        .. code-block:: python
+            dataset = Cityscapes('./data/cityscapes', split='train', mode='fine',
+                                 target_type='semantic')
+            img, smnt = dataset[0]
+        Get multiple targets
+        .. code-block:: python
+            dataset = Cityscapes('./data/cityscapes', split='train', mode='fine',
+                                 target_type=['instance', 'color', 'polygon'])
+            img, (inst, col, poly) = dataset[0]
+        Validate on the "coarse" set
+        .. code-block:: python
+            dataset = Cityscapes('./data/cityscapes', split='val', mode='coarse',
+                                 target_type='semantic')
+            img, smnt = dataset[0]
+    """
+    # Based on https://github.com/mcordts/cityscapesScripts
+    CityscapesClass = namedtuple(
+        "CityscapesClass",
+        ["name", "id", "train_id", "category", "category_id", "has_instances", "ignore_in_eval", "color"],
+    )
+    classes = [
+        CityscapesClass("unlabeled", 0, 255, "void", 0, False, True, (0, 0, 0)),
+        CityscapesClass("ego vehicle", 1, 255, "void", 0, False, True, (0, 0, 0)),
+        CityscapesClass("rectification border", 2, 255, "void", 0, False, True, (0, 0, 0)),
+        CityscapesClass("out of roi", 3, 255, "void", 0, False, True, (0, 0, 0)),
+        CityscapesClass("static", 4, 255, "void", 0, False, True, (0, 0, 0)),
+        CityscapesClass("dynamic", 5, 255, "void", 0, False, True, (111, 74, 0)),
+        CityscapesClass("ground", 6, 255, "void", 0, False, True, (81, 0, 81)),
+        CityscapesClass("road", 7, 0, "flat", 1, False, False, (128, 64, 128)),
+        CityscapesClass("sidewalk", 8, 1, "flat", 1, False, False, (244, 35, 232)),
+        CityscapesClass("parking", 9, 255, "flat", 1, False, True, (250, 170, 160)),
+        CityscapesClass("rail track", 10, 255, "flat", 1, False, True, (230, 150, 140)),
+        CityscapesClass("building", 11, 2, "construction", 2, False, False, (70, 70, 70)),
+        CityscapesClass("wall", 12, 3, "construction", 2, False, False, (102, 102, 156)),
+        CityscapesClass("fence", 13, 4, "construction", 2, False, False, (190, 153, 153)),
+        CityscapesClass("guard rail", 14, 255, "construction", 2, False, True, (180, 165, 180)),
+        CityscapesClass("bridge", 15, 255, "construction", 2, False, True, (150, 100, 100)),
+        CityscapesClass("tunnel", 16, 255, "construction", 2, False, True, (150, 120, 90)),
+        CityscapesClass("pole", 17, 5, "object", 3, False, False, (153, 153, 153)),
+        CityscapesClass("polegroup", 18, 255, "object", 3, False, True, (153, 153, 153)),
+        CityscapesClass("traffic light", 19, 6, "object", 3, False, False, (250, 170, 30)),
+        CityscapesClass("traffic sign", 20, 7, "object", 3, False, False, (220, 220, 0)),
+        CityscapesClass("vegetation", 21, 8, "nature", 4, False, False, (107, 142, 35)),
+        CityscapesClass("terrain", 22, 9, "nature", 4, False, False, (152, 251, 152)),
+        CityscapesClass("sky", 23, 10, "sky", 5, False, False, (70, 130, 180)),
+        CityscapesClass("person", 24, 11, "human", 6, True, False, (220, 20, 60)),
+        CityscapesClass("rider", 25, 12, "human", 6, True, False, (255, 0, 0)),
+        CityscapesClass("car", 26, 13, "vehicle", 7, True, False, (0, 0, 142)),
+        CityscapesClass("truck", 27, 14, "vehicle", 7, True, False, (0, 0, 70)),
+        CityscapesClass("bus", 28, 15, "vehicle", 7, True, False, (0, 60, 100)),
+        CityscapesClass("caravan", 29, 255, "vehicle", 7, True, True, (0, 0, 90)),
+        CityscapesClass("trailer", 30, 255, "vehicle", 7, True, True, (0, 0, 110)),
+        CityscapesClass("train", 31, 16, "vehicle", 7, True, False, (0, 80, 100)),
+        CityscapesClass("motorcycle", 32, 17, "vehicle", 7, True, False, (0, 0, 230)),
+        CityscapesClass("bicycle", 33, 18, "vehicle", 7, True, False, (119, 11, 32)),
+        CityscapesClass("license plate", -1, -1, "vehicle", 7, False, True, (0, 0, 142)),
+    ]
+    def __init__(
+        self,
+        root: Union[str, Path],
+        split: str = "train",
+        mode: str = "fine",
+        target_type: Union[List[str], str] = "instance",
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        transforms: Optional[Callable] = None,
+    ) -> None:
+        super().__init__(root, transforms, transform, target_transform)
+        self.mode = "gtFine" if mode == "fine" else "gtCoarse"
+        self.images_dir = os.path.join(self.root, "leftImg8bit", split)
+        self.targets_dir = os.path.join(self.root, self.mode, split)
+        self.target_type = target_type
+        self.split = split
+        self.images = []
+        self.targets = []
+        verify_str_arg(mode, "mode", ("fine", "coarse"))
+        if mode == "fine":
+            valid_modes = ("train", "test", "val")
+        else:
+            valid_modes = ("train", "train_extra", "val")
+        msg = "Unknown value '{}' for argument split if mode is '{}'. Valid values are {{{}}}."
+        msg = msg.format(split, mode, iterable_to_str(valid_modes))
+        verify_str_arg(split, "split", valid_modes, msg)
+        if not isinstance(target_type, list):
+            self.target_type = [target_type]
+        [
+            verify_str_arg(value, "target_type", ("instance", "semantic", "polygon", "color"))
+            for value in self.target_type
+        ]
+        if not os.path.isdir(self.images_dir) or not os.path.isdir(self.targets_dir):
+            if split == "train_extra":
+                image_dir_zip = os.path.join(self.root, "leftImg8bit_trainextra.zip")
+            else:
+                image_dir_zip = os.path.join(self.root, "leftImg8bit_trainvaltest.zip")
+            if self.mode == "gtFine":
+                target_dir_zip = os.path.join(self.root, f"{self.mode}_trainvaltest.zip")
+            elif self.mode == "gtCoarse":
+                target_dir_zip = os.path.join(self.root, f"{self.mode}.zip")
+            if os.path.isfile(image_dir_zip) and os.path.isfile(target_dir_zip):
+                extract_archive(from_path=image_dir_zip, to_path=self.root)
+                extract_archive(from_path=target_dir_zip, to_path=self.root)
+            else:
+                raise RuntimeError(
+                    "Dataset not found or incomplete. Please make sure all required folders for the"
+                    ' specified "split" and "mode" are inside the "root" directory'
+                )
+        for city in os.listdir(self.images_dir):
+            img_dir = os.path.join(self.images_dir, city)
+            target_dir = os.path.join(self.targets_dir, city)
+            for file_name in os.listdir(img_dir):
+                target_types = []
+                for t in self.target_type:
+                    target_name = "{}_{}".format(
+                        file_name.split("_leftImg8bit")[0], self._get_target_suffix(self.mode, t)
+                    )
+                    target_types.append(os.path.join(target_dir, target_name))
+                self.images.append(os.path.join(img_dir, file_name))
+                self.targets.append(target_types)
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is a tuple of all target types if target_type is a list with more
+            than one item. Otherwise, target is a json object if target_type="polygon", else the image segmentation.
+        """
+        image = Image.open(self.images[index]).convert("RGB")
+        targets: Any = []
+        for i, t in enumerate(self.target_type):
+            if t == "polygon":
+                target = self._load_json(self.targets[index][i])
+            else:
+                target = Image.open(self.targets[index][i])  # type: ignore[assignment]
+            targets.append(target)
+        target = tuple(targets) if len(targets) > 1 else targets[0]
+        if self.transforms is not None:
+            image, target = self.transforms(image, target)
+        return image, target
+    def __len__(self) -> int:
+        return len(self.images)
+    def extra_repr(self) -> str:
+        lines = ["Split: {split}", "Mode: {mode}", "Type: {target_type}"]
+        return "\n".join(lines).format(**self.__dict__)
+    def _load_json(self, path: str) -> Dict[str, Any]:
+        with open(path) as file:
+            data = json.load(file)
+        return data
+    def _get_target_suffix(self, mode: str, target_type: str) -> str:
+        if target_type == "instance":
+            return f"{mode}_instanceIds.png"
+        elif target_type == "semantic":
+            return f"{mode}_labelIds.png"
+        elif target_type == "color":
+            return f"{mode}_color.png"
+        else:
+            return f"{mode}_polygons.json"

.venv/lib/python3.11/site-packages/torchvision/datasets/clevr.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import json
+import pathlib
+from typing import Any, Callable, List, Optional, Tuple, Union
+from urllib.parse import urlparse
+from PIL import Image
+from .utils import download_and_extract_archive, verify_str_arg
+from .vision import VisionDataset
+class CLEVRClassification(VisionDataset):
+    """`CLEVR <https://cs.stanford.edu/people/jcjohns/clevr/>`_  classification dataset.
+    The number of objects in a scene are used as label.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of dataset where directory ``root/clevr`` exists or will be saved to if download is
+            set to True.
+        split (string, optional): The dataset split, supports ``"train"`` (default), ``"val"``, or ``"test"``.
+        transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed
+            version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in them target and transforms it.
+        download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If
+            dataset is already downloaded, it is not downloaded again.
+    """
+    _URL = "https://dl.fbaipublicfiles.com/clevr/CLEVR_v1.0.zip"
+    _MD5 = "b11922020e72d0cd9154779b2d3d07d2"
+    def __init__(
+        self,
+        root: Union[str, pathlib.Path],
+        split: str = "train",
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        self._split = verify_str_arg(split, "split", ("train", "val", "test"))
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self._base_folder = pathlib.Path(self.root) / "clevr"
+        self._data_folder = self._base_folder / pathlib.Path(urlparse(self._URL).path).stem
+        if download:
+            self._download()
+        if not self._check_exists():
+            raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")
+        self._image_files = sorted(self._data_folder.joinpath("images", self._split).glob("*"))
+        self._labels: List[Optional[int]]
+        if self._split != "test":
+            with open(self._data_folder / "scenes" / f"CLEVR_{self._split}_scenes.json") as file:
+                content = json.load(file)
+            num_objects = {scene["image_filename"]: len(scene["objects"]) for scene in content["scenes"]}
+            self._labels = [num_objects[image_file.name] for image_file in self._image_files]
+        else:
+            self._labels = [None] * len(self._image_files)
+    def __len__(self) -> int:
+        return len(self._image_files)
+    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+        image_file = self._image_files[idx]
+        label = self._labels[idx]
+        image = Image.open(image_file).convert("RGB")
+        if self.transform:
+            image = self.transform(image)
+        if self.target_transform:
+            label = self.target_transform(label)
+        return image, label
+    def _check_exists(self) -> bool:
+        return self._data_folder.exists() and self._data_folder.is_dir()
+    def _download(self) -> None:
+        if self._check_exists():
+            return
+        download_and_extract_archive(self._URL, str(self._base_folder), md5=self._MD5)
+    def extra_repr(self) -> str:
+        return f"split={self._split}"

.venv/lib/python3.11/site-packages/torchvision/datasets/coco.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os.path
+from pathlib import Path
+from typing import Any, Callable, List, Optional, Tuple, Union
+from PIL import Image
+from .vision import VisionDataset
+class CocoDetection(VisionDataset):
+    """`MS Coco Detection <https://cocodataset.org/#detection-2016>`_ Dataset.
+    It requires the `COCO API to be installed <https://github.com/pdollar/coco/tree/master/PythonAPI>`_.
+    Args:
+        root (str or ``pathlib.Path``): Root directory where images are downloaded to.
+        annFile (string): Path to json annotation file.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.PILToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        annFile: str,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        transforms: Optional[Callable] = None,
+    ) -> None:
+        super().__init__(root, transforms, transform, target_transform)
+        from pycocotools.coco import COCO
+        self.coco = COCO(annFile)
+        self.ids = list(sorted(self.coco.imgs.keys()))
+    def _load_image(self, id: int) -> Image.Image:
+        path = self.coco.loadImgs(id)[0]["file_name"]
+        return Image.open(os.path.join(self.root, path)).convert("RGB")
+    def _load_target(self, id: int) -> List[Any]:
+        return self.coco.loadAnns(self.coco.getAnnIds(id))
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        if not isinstance(index, int):
+            raise ValueError(f"Index must be of type integer, got {type(index)} instead.")
+        id = self.ids[index]
+        image = self._load_image(id)
+        target = self._load_target(id)
+        if self.transforms is not None:
+            image, target = self.transforms(image, target)
+        return image, target
+    def __len__(self) -> int:
+        return len(self.ids)
+class CocoCaptions(CocoDetection):
+    """`MS Coco Captions <https://cocodataset.org/#captions-2015>`_ Dataset.
+    It requires the `COCO API to be installed <https://github.com/pdollar/coco/tree/master/PythonAPI>`_.
+    Args:
+        root (str or ``pathlib.Path``): Root directory where images are downloaded to.
+        annFile (string): Path to json annotation file.
+        transform (callable, optional): A function/transform that  takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.PILToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+    Example:
+        .. code:: python
+            import torchvision.datasets as dset
+            import torchvision.transforms as transforms
+            cap = dset.CocoCaptions(root = 'dir where images are',
+                                    annFile = 'json annotation file',
+                                    transform=transforms.PILToTensor())
+            print('Number of samples: ', len(cap))
+            img, target = cap[3] # load 4th sample
+            print("Image Size: ", img.size())
+            print(target)
+        Output: ::
+            Number of samples: 82783
+            Image Size: (3L, 427L, 640L)
+            [u'A plane emitting smoke stream flying over a mountain.',
+            u'A plane darts across a bright blue sky behind a mountain covered in snow',
+            u'A plane leaves a contrail above the snowy mountain top.',
+            u'A mountain that has a plane flying overheard in the distance.',
+            u'A mountain view with a plume of smoke in the background']
+    """
+    def _load_target(self, id: int) -> List[str]:
+        return [ann["caption"] for ann in super()._load_target(id)]

.venv/lib/python3.11/site-packages/torchvision/datasets/dtd.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import os
+import pathlib
+from typing import Any, Callable, Optional, Tuple, Union
+import PIL.Image
+from .utils import download_and_extract_archive, verify_str_arg
+from .vision import VisionDataset
+class DTD(VisionDataset):
+    """`Describable Textures Dataset (DTD) <https://www.robots.ox.ac.uk/~vgg/data/dtd/>`_.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of the dataset.
+        split (string, optional): The dataset split, supports ``"train"`` (default), ``"val"``, or ``"test"``.
+        partition (int, optional): The dataset partition. Should be ``1 <= partition <= 10``. Defaults to ``1``.
+            .. note::
+                The partition only changes which split each image belongs to. Thus, regardless of the selected
+                partition, combining all splits will result in all images.
+        transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed
+            version. E.g, ``transforms.RandomCrop``.
+        target_transform (callable, optional): A function/transform that takes in the target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again. Default is False.
+    """
+    _URL = "https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz"
+    _MD5 = "fff73e5086ae6bdbea199a49dfb8a4c1"
+    def __init__(
+        self,
+        root: Union[str, pathlib.Path],
+        split: str = "train",
+        partition: int = 1,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        self._split = verify_str_arg(split, "split", ("train", "val", "test"))
+        if not isinstance(partition, int) and not (1 <= partition <= 10):
+            raise ValueError(
+                f"Parameter 'partition' should be an integer with `1 <= partition <= 10`, "
+                f"but got {partition} instead"
+            )
+        self._partition = partition
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self._base_folder = pathlib.Path(self.root) / type(self).__name__.lower()
+        self._data_folder = self._base_folder / "dtd"
+        self._meta_folder = self._data_folder / "labels"
+        self._images_folder = self._data_folder / "images"
+        if download:
+            self._download()
+        if not self._check_exists():
+            raise RuntimeError("Dataset not found. You can use download=True to download it")
+        self._image_files = []
+        classes = []
+        with open(self._meta_folder / f"{self._split}{self._partition}.txt") as file:
+            for line in file:
+                cls, name = line.strip().split("/")
+                self._image_files.append(self._images_folder.joinpath(cls, name))
+                classes.append(cls)
+        self.classes = sorted(set(classes))
+        self.class_to_idx = dict(zip(self.classes, range(len(self.classes))))
+        self._labels = [self.class_to_idx[cls] for cls in classes]
+    def __len__(self) -> int:
+        return len(self._image_files)
+    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+        image_file, label = self._image_files[idx], self._labels[idx]
+        image = PIL.Image.open(image_file).convert("RGB")
+        if self.transform:
+            image = self.transform(image)
+        if self.target_transform:
+            label = self.target_transform(label)
+        return image, label
+    def extra_repr(self) -> str:
+        return f"split={self._split}, partition={self._partition}"
+    def _check_exists(self) -> bool:
+        return os.path.exists(self._data_folder) and os.path.isdir(self._data_folder)
+    def _download(self) -> None:
+        if self._check_exists():
+            return
+        download_and_extract_archive(self._URL, download_root=str(self._base_folder), md5=self._MD5)

.venv/lib/python3.11/site-packages/torchvision/datasets/eurosat.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+from pathlib import Path
+from typing import Callable, Optional, Union
+from .folder import ImageFolder
+from .utils import download_and_extract_archive
+class EuroSAT(ImageFolder):
+    """RGB version of the `EuroSAT <https://github.com/phelber/eurosat>`_ Dataset.
+    For the MS version of the dataset, see
+    `TorchGeo <https://torchgeo.readthedocs.io/en/stable/api/datasets.html#eurosat>`__.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of dataset where ``root/eurosat`` exists.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again. Default is False.
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        self.root = os.path.expanduser(root)
+        self._base_folder = os.path.join(self.root, "eurosat")
+        self._data_folder = os.path.join(self._base_folder, "2750")
+        if download:
+            self.download()
+        if not self._check_exists():
+            raise RuntimeError("Dataset not found. You can use download=True to download it")
+        super().__init__(self._data_folder, transform=transform, target_transform=target_transform)
+        self.root = os.path.expanduser(root)
+    def __len__(self) -> int:
+        return len(self.samples)
+    def _check_exists(self) -> bool:
+        return os.path.exists(self._data_folder)
+    def download(self) -> None:
+        if self._check_exists():
+            return
+        os.makedirs(self._base_folder, exist_ok=True)
+        download_and_extract_archive(
+            "https://huggingface.co/datasets/torchgeo/eurosat/resolve/c877bcd43f099cd0196738f714544e355477f3fd/EuroSAT.zip",
+            download_root=self._base_folder,
+            md5="c8fa014336c82ac7804f0398fcb19387",
+        )

.venv/lib/python3.11/site-packages/torchvision/datasets/fakedata.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from typing import Any, Callable, Optional, Tuple
+import torch
+from .. import transforms
+from .vision import VisionDataset
+class FakeData(VisionDataset):
+    """A fake dataset that returns randomly generated images and returns them as PIL images
+    Args:
+        size (int, optional): Size of the dataset. Default: 1000 images
+        image_size(tuple, optional): Size if the returned images. Default: (3, 224, 224)
+        num_classes(int, optional): Number of classes in the dataset. Default: 10
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        random_offset (int): Offsets the index-based random seed used to
+            generate each image. Default: 0
+    """
+    def __init__(
+        self,
+        size: int = 1000,
+        image_size: Tuple[int, int, int] = (3, 224, 224),
+        num_classes: int = 10,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        random_offset: int = 0,
+    ) -> None:
+        super().__init__(transform=transform, target_transform=target_transform)
+        self.size = size
+        self.num_classes = num_classes
+        self.image_size = image_size
+        self.random_offset = random_offset
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is class_index of the target class.
+        """
+        # create random image that is consistent with the index id
+        if index >= len(self):
+            raise IndexError(f"{self.__class__.__name__} index out of range")
+        rng_state = torch.get_rng_state()
+        torch.manual_seed(index + self.random_offset)
+        img = torch.randn(*self.image_size)
+        target = torch.randint(0, self.num_classes, size=(1,), dtype=torch.long)[0]
+        torch.set_rng_state(rng_state)
+        # convert to PIL Image
+        img = transforms.ToPILImage()(img)
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return img, target.item()
+    def __len__(self) -> int:
+        return self.size

.venv/lib/python3.11/site-packages/torchvision/datasets/fer2013.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import csv
+import pathlib
+from typing import Any, Callable, Optional, Tuple, Union
+import torch
+from PIL import Image
+from .utils import check_integrity, verify_str_arg
+from .vision import VisionDataset
+class FER2013(VisionDataset):
+    """`FER2013
+    <https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge>`_ Dataset.
+    .. note::
+        This dataset can return test labels only if ``fer2013.csv`` OR
+        ``icml_face_data.csv`` are present in ``root/fer2013/``. If only
+        ``train.csv`` and ``test.csv`` are present, the test labels are set to
+        ``None``.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of dataset where directory
+            ``root/fer2013`` exists. This directory may contain either
+            ``fer2013.csv``, ``icml_face_data.csv``, or both ``train.csv`` and
+            ``test.csv``. Precendence is given in that order, i.e. if
+            ``fer2013.csv`` is present then the rest of the files will be
+            ignored. All these (combinations of) files contain the same data and
+            are supported for convenience, but only ``fer2013.csv`` and
+            ``icml_face_data.csv`` are able to return non-None test labels.
+        split (string, optional): The dataset split, supports ``"train"`` (default), or ``"test"``.
+        transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed
+            version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the target and transforms it.
+    """
+    _RESOURCES = {
+        "train": ("train.csv", "3f0dfb3d3fd99c811a1299cb947e3131"),
+        "test": ("test.csv", "b02c2298636a634e8c2faabbf3ea9a23"),
+        # The fer2013.csv and icml_face_data.csv files contain both train and
+        # tests instances, and unlike test.csv they contain the labels for the
+        # test instances. We give these 2 files precedence over train.csv and
+        # test.csv. And yes, they both contain the same data, but with different
+        # column names (note the spaces) and ordering:
+        # $ head -n 1 fer2013.csv icml_face_data.csv train.csv test.csv
+        # ==> fer2013.csv <==
+        # emotion,pixels,Usage
+        #
+        # ==> icml_face_data.csv <==
+        # emotion, Usage, pixels
+        #
+        # ==> train.csv <==
+        # emotion,pixels
+        #
+        # ==> test.csv <==
+        # pixels
+        "fer": ("fer2013.csv", "f8428a1edbd21e88f42c73edd2a14f95"),
+        "icml": ("icml_face_data.csv", "b114b9e04e6949e5fe8b6a98b3892b1d"),
+    }
+    def __init__(
+        self,
+        root: Union[str, pathlib.Path],
+        split: str = "train",
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+    ) -> None:
+        self._split = verify_str_arg(split, "split", ("train", "test"))
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        base_folder = pathlib.Path(self.root) / "fer2013"
+        use_fer_file = (base_folder / self._RESOURCES["fer"][0]).exists()
+        use_icml_file = not use_fer_file and (base_folder / self._RESOURCES["icml"][0]).exists()
+        file_name, md5 = self._RESOURCES["fer" if use_fer_file else "icml" if use_icml_file else self._split]
+        data_file = base_folder / file_name
+        if not check_integrity(str(data_file), md5=md5):
+            raise RuntimeError(
+                f"{file_name} not found in {base_folder} or corrupted. "
+                f"You can download it from "
+                f"https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge"
+            )
+        pixels_key = " pixels" if use_icml_file else "pixels"
+        usage_key = " Usage" if use_icml_file else "Usage"
+        def get_img(row):
+            return torch.tensor([int(idx) for idx in row[pixels_key].split()], dtype=torch.uint8).reshape(48, 48)
+        def get_label(row):
+            if use_fer_file or use_icml_file or self._split == "train":
+                return int(row["emotion"])
+            else:
+                return None
+        with open(data_file, "r", newline="") as file:
+            rows = (row for row in csv.DictReader(file))
+            if use_fer_file or use_icml_file:
+                valid_keys = ("Training",) if self._split == "train" else ("PublicTest", "PrivateTest")
+                rows = (row for row in rows if row[usage_key] in valid_keys)
+            self._samples = [(get_img(row), get_label(row)) for row in rows]
+    def __len__(self) -> int:
+        return len(self._samples)
+    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+        image_tensor, target = self._samples[idx]
+        image = Image.fromarray(image_tensor.numpy())
+        if self.transform is not None:
+            image = self.transform(image)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return image, target
+    def extra_repr(self) -> str:
+        return f"split={self._split}"

.venv/lib/python3.11/site-packages/torchvision/datasets/fgvc_aircraft.py ADDED Viewed

	@@ -0,0 +1,115 @@

+from __future__ import annotations
+import os
+from pathlib import Path
+from typing import Any, Callable, Optional, Tuple, Union
+import PIL.Image
+from .utils import download_and_extract_archive, verify_str_arg
+from .vision import VisionDataset
+class FGVCAircraft(VisionDataset):
+    """`FGVC Aircraft <https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/>`_ Dataset.
+    The dataset contains 10,000 images of aircraft, with 100 images for each of 100
+    different aircraft model variants, most of which are airplanes.
+    Aircraft models are organized in a three-levels hierarchy. The three levels, from
+    finer to coarser, are:
+    - ``variant``, e.g. Boeing 737-700. A variant collapses all the models that are visually
+        indistinguishable into one class. The dataset comprises 100 different variants.
+    - ``family``, e.g. Boeing 737. The dataset comprises 70 different families.
+    - ``manufacturer``, e.g. Boeing. The dataset comprises 30 different manufacturers.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of the FGVC Aircraft dataset.
+        split (string, optional): The dataset split, supports ``train``, ``val``,
+            ``trainval`` and ``test``.
+        annotation_level (str, optional): The annotation level, supports ``variant``,
+            ``family`` and ``manufacturer``.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+    _URL = "https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/archives/fgvc-aircraft-2013b.tar.gz"
+    def __init__(
+        self,
+        root: Union[str, Path],
+        split: str = "trainval",
+        annotation_level: str = "variant",
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self._split = verify_str_arg(split, "split", ("train", "val", "trainval", "test"))
+        self._annotation_level = verify_str_arg(
+            annotation_level, "annotation_level", ("variant", "family", "manufacturer")
+        )
+        self._data_path = os.path.join(self.root, "fgvc-aircraft-2013b")
+        if download:
+            self._download()
+        if not self._check_exists():
+            raise RuntimeError("Dataset not found. You can use download=True to download it")
+        annotation_file = os.path.join(
+            self._data_path,
+            "data",
+            {
+                "variant": "variants.txt",
+                "family": "families.txt",
+                "manufacturer": "manufacturers.txt",
+            }[self._annotation_level],
+        )
+        with open(annotation_file, "r") as f:
+            self.classes = [line.strip() for line in f]
+        self.class_to_idx = dict(zip(self.classes, range(len(self.classes))))
+        image_data_folder = os.path.join(self._data_path, "data", "images")
+        labels_file = os.path.join(self._data_path, "data", f"images_{self._annotation_level}_{self._split}.txt")
+        self._image_files = []
+        self._labels = []
+        with open(labels_file, "r") as f:
+            for line in f:
+                image_name, label_name = line.strip().split(" ", 1)
+                self._image_files.append(os.path.join(image_data_folder, f"{image_name}.jpg"))
+                self._labels.append(self.class_to_idx[label_name])
+    def __len__(self) -> int:
+        return len(self._image_files)
+    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+        image_file, label = self._image_files[idx], self._labels[idx]
+        image = PIL.Image.open(image_file).convert("RGB")
+        if self.transform:
+            image = self.transform(image)
+        if self.target_transform:
+            label = self.target_transform(label)
+        return image, label
+    def _download(self) -> None:
+        """
+        Download the FGVC Aircraft dataset archive and extract it under root.
+        """
+        if self._check_exists():
+            return
+        download_and_extract_archive(self._URL, self.root)
+    def _check_exists(self) -> bool:
+        return os.path.exists(self._data_path) and os.path.isdir(self._data_path)

.venv/lib/python3.11/site-packages/torchvision/datasets/flickr.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import glob
+import os
+from collections import defaultdict
+from html.parser import HTMLParser
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from PIL import Image
+from .vision import VisionDataset
+class Flickr8kParser(HTMLParser):
+    """Parser for extracting captions from the Flickr8k dataset web page."""
+    def __init__(self, root: Union[str, Path]) -> None:
+        super().__init__()
+        self.root = root
+        # Data structure to store captions
+        self.annotations: Dict[str, List[str]] = {}
+        # State variables
+        self.in_table = False
+        self.current_tag: Optional[str] = None
+        self.current_img: Optional[str] = None
+    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
+        self.current_tag = tag
+        if tag == "table":
+            self.in_table = True
+    def handle_endtag(self, tag: str) -> None:
+        self.current_tag = None
+        if tag == "table":
+            self.in_table = False
+    def handle_data(self, data: str) -> None:
+        if self.in_table:
+            if data == "Image Not Found":
+                self.current_img = None
+            elif self.current_tag == "a":
+                img_id = data.split("/")[-2]
+                img_id = os.path.join(self.root, img_id + "_*.jpg")
+                img_id = glob.glob(img_id)[0]
+                self.current_img = img_id
+                self.annotations[img_id] = []
+            elif self.current_tag == "li" and self.current_img:
+                img_id = self.current_img
+                self.annotations[img_id].append(data.strip())
+class Flickr8k(VisionDataset):
+    """`Flickr8k Entities <http://hockenmaier.cs.illinois.edu/8k-pictures.html>`_ Dataset.
+    Args:
+        root (str or ``pathlib.Path``): Root directory where images are downloaded to.
+        ann_file (string): Path to annotation file.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.PILToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+    """
+    def __init__(
+        self,
+        root: Union[str, Path],
+        ann_file: str,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+    ) -> None:
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self.ann_file = os.path.expanduser(ann_file)
+        # Read annotations and store in a dict
+        parser = Flickr8kParser(self.root)
+        with open(self.ann_file) as fh:
+            parser.feed(fh.read())
+        self.annotations = parser.annotations
+        self.ids = list(sorted(self.annotations.keys()))
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: Tuple (image, target). target is a list of captions for the image.
+        """
+        img_id = self.ids[index]
+        # Image
+        img = Image.open(img_id).convert("RGB")
+        if self.transform is not None:
+            img = self.transform(img)
+        # Captions
+        target = self.annotations[img_id]
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return img, target
+    def __len__(self) -> int:
+        return len(self.ids)
+class Flickr30k(VisionDataset):
+    """`Flickr30k Entities <https://bryanplummer.com/Flickr30kEntities/>`_ Dataset.
+    Args:
+        root (str or ``pathlib.Path``): Root directory where images are downloaded to.
+        ann_file (string): Path to annotation file.
+        transform (callable, optional): A function/transform that takes in a PIL image
+            and returns a transformed version. E.g, ``transforms.PILToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+    """
+    def __init__(
+        self,
+        root: str,
+        ann_file: str,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+    ) -> None:
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self.ann_file = os.path.expanduser(ann_file)
+        # Read annotations and store in a dict
+        self.annotations = defaultdict(list)
+        with open(self.ann_file) as fh:
+            for line in fh:
+                img_id, caption = line.strip().split("\t")
+                self.annotations[img_id[:-2]].append(caption)
+        self.ids = list(sorted(self.annotations.keys()))
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: Tuple (image, target). target is a list of captions for the image.
+        """
+        img_id = self.ids[index]
+        # Image
+        filename = os.path.join(self.root, img_id)
+        img = Image.open(filename).convert("RGB")
+        if self.transform is not None:
+            img = self.transform(img)
+        # Captions
+        target = self.annotations[img_id]
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return img, target
+    def __len__(self) -> int:
+        return len(self.ids)

.venv/lib/python3.11/site-packages/torchvision/datasets/flowers102.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from pathlib import Path
+from typing import Any, Callable, Optional, Tuple, Union
+import PIL.Image
+from .utils import check_integrity, download_and_extract_archive, download_url, verify_str_arg
+from .vision import VisionDataset
+class Flowers102(VisionDataset):
+    """`Oxford 102 Flower <https://www.robots.ox.ac.uk/~vgg/data/flowers/102/>`_ Dataset.
+    .. warning::
+        This class needs `scipy <https://docs.scipy.org/doc/>`_ to load target files from `.mat` format.
+    Oxford 102 Flower is an image classification dataset consisting of 102 flower categories. The
+    flowers were chosen to be flowers commonly occurring in the United Kingdom. Each class consists of
+    between 40 and 258 images.
+    The images have large scale, pose and light variations. In addition, there are categories that
+    have large variations within the category, and several very similar categories.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of the dataset.
+        split (string, optional): The dataset split, supports ``"train"`` (default), ``"val"``, or ``"test"``.
+        transform (callable, optional): A function/transform that takes in a PIL image and returns a
+            transformed version. E.g, ``transforms.RandomCrop``.
+        target_transform (callable, optional): A function/transform that takes in the target and transforms it.
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+    """
+    _download_url_prefix = "https://www.robots.ox.ac.uk/~vgg/data/flowers/102/"
+    _file_dict = {  # filename, md5
+        "image": ("102flowers.tgz", "52808999861908f626f3c1f4e79d11fa"),
+        "label": ("imagelabels.mat", "e0620be6f572b9609742df49c70aed4d"),
+        "setid": ("setid.mat", "a5357ecc9cb78c4bef273ce3793fc85c"),
+    }
+    _splits_map = {"train": "trnid", "val": "valid", "test": "tstid"}
+    def __init__(
+        self,
+        root: Union[str, Path],
+        split: str = "train",
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self._split = verify_str_arg(split, "split", ("train", "val", "test"))
+        self._base_folder = Path(self.root) / "flowers-102"
+        self._images_folder = self._base_folder / "jpg"
+        if download:
+            self.download()
+        if not self._check_integrity():
+            raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")
+        from scipy.io import loadmat
+        set_ids = loadmat(self._base_folder / self._file_dict["setid"][0], squeeze_me=True)
+        image_ids = set_ids[self._splits_map[self._split]].tolist()
+        labels = loadmat(self._base_folder / self._file_dict["label"][0], squeeze_me=True)
+        image_id_to_label = dict(enumerate((labels["labels"] - 1).tolist(), 1))
+        self._labels = []
+        self._image_files = []
+        for image_id in image_ids:
+            self._labels.append(image_id_to_label[image_id])
+            self._image_files.append(self._images_folder / f"image_{image_id:05d}.jpg")
+    def __len__(self) -> int:
+        return len(self._image_files)
+    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+        image_file, label = self._image_files[idx], self._labels[idx]
+        image = PIL.Image.open(image_file).convert("RGB")
+        if self.transform:
+            image = self.transform(image)
+        if self.target_transform:
+            label = self.target_transform(label)
+        return image, label
+    def extra_repr(self) -> str:
+        return f"split={self._split}"
+    def _check_integrity(self):
+        if not (self._images_folder.exists() and self._images_folder.is_dir()):
+            return False
+        for id in ["label", "setid"]:
+            filename, md5 = self._file_dict[id]
+            if not check_integrity(str(self._base_folder / filename), md5):
+                return False
+        return True
+    def download(self):
+        if self._check_integrity():
+            return
+        download_and_extract_archive(
+            f"{self._download_url_prefix}{self._file_dict['image'][0]}",
+            str(self._base_folder),
+            md5=self._file_dict["image"][1],
+        )
+        for id in ["label", "setid"]:
+            filename, md5 = self._file_dict[id]
+            download_url(self._download_url_prefix + filename, str(self._base_folder), md5=md5)

.venv/lib/python3.11/site-packages/torchvision/datasets/food101.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import json
+from pathlib import Path
+from typing import Any, Callable, Optional, Tuple, Union
+import PIL.Image
+from .utils import download_and_extract_archive, verify_str_arg
+from .vision import VisionDataset
+class Food101(VisionDataset):
+    """`The Food-101 Data Set <https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/>`_.
+    The Food-101 is a challenging data set of 101 food categories with 101,000 images.
+    For each class, 250 manually reviewed test images are provided as well as 750 training images.
+    On purpose, the training images were not cleaned, and thus still contain some amount of noise.
+    This comes mostly in the form of intense colors and sometimes wrong labels. All images were
+    rescaled to have a maximum side length of 512 pixels.
+    Args:
+        root (str or ``pathlib.Path``): Root directory of the dataset.
+        split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``.
+        transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed
+            version. E.g, ``transforms.RandomCrop``.
+        target_transform (callable, optional): A function/transform that takes in the target and transforms it.
+        download (bool, optional): If True, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again. Default is False.
+    """
+    _URL = "http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz"
+    _MD5 = "85eeb15f3717b99a5da872d97d918f87"
+    def __init__(
+        self,
+        root: Union[str, Path],
+        split: str = "train",
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        download: bool = False,
+    ) -> None:
+        super().__init__(root, transform=transform, target_transform=target_transform)
+        self._split = verify_str_arg(split, "split", ("train", "test"))
+        self._base_folder = Path(self.root) / "food-101"
+        self._meta_folder = self._base_folder / "meta"
+        self._images_folder = self._base_folder / "images"
+        if download:
+            self._download()
+        if not self._check_exists():
+            raise RuntimeError("Dataset not found. You can use download=True to download it")
+        self._labels = []
+        self._image_files = []
+        with open(self._meta_folder / f"{split}.json") as f:
+            metadata = json.loads(f.read())
+        self.classes = sorted(metadata.keys())
+        self.class_to_idx = dict(zip(self.classes, range(len(self.classes))))
+        for class_label, im_rel_paths in metadata.items():
+            self._labels += [self.class_to_idx[class_label]] * len(im_rel_paths)
+            self._image_files += [
+                self._images_folder.joinpath(*f"{im_rel_path}.jpg".split("/")) for im_rel_path in im_rel_paths
+            ]
+    def __len__(self) -> int:
+        return len(self._image_files)
+    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
+        image_file, label = self._image_files[idx], self._labels[idx]
+        image = PIL.Image.open(image_file).convert("RGB")
+        if self.transform:
+            image = self.transform(image)
+        if self.target_transform:
+            label = self.target_transform(label)
+        return image, label
+    def extra_repr(self) -> str:
+        return f"split={self._split}"
+    def _check_exists(self) -> bool:
+        return all(folder.exists() and folder.is_dir() for folder in (self._meta_folder, self._images_folder))
+    def _download(self) -> None:
+        if self._check_exists():
+            return
+        download_and_extract_archive(self._URL, download_root=self.root, md5=self._MD5)