Delete delta-iris/src/data

Browse files

Files changed (16) hide show

delta-iris/src/data/__init__.py +0 -7
delta-iris/src/data/__pycache__/__init__.cpython-310.pyc +0 -0
delta-iris/src/data/__pycache__/batch.cpython-310.pyc +0 -0
delta-iris/src/data/__pycache__/dataset.cpython-310.pyc +0 -0
delta-iris/src/data/__pycache__/episode.cpython-310.pyc +0 -0
delta-iris/src/data/__pycache__/episode_count.cpython-310.pyc +0 -0
delta-iris/src/data/__pycache__/sampler.cpython-310.pyc +0 -0
delta-iris/src/data/__pycache__/segment.cpython-310.pyc +0 -0
delta-iris/src/data/__pycache__/utils.cpython-310.pyc +0 -0
delta-iris/src/data/batch.py +0 -24
delta-iris/src/data/dataset.py +0 -104
delta-iris/src/data/episode.py +0 -41
delta-iris/src/data/episode_count.py +0 -41
delta-iris/src/data/sampler.py +0 -42
delta-iris/src/data/segment.py +0 -25
delta-iris/src/data/utils.py +0 -69

delta-iris/src/data/__init__.py DELETED Viewed

@@ -1,7 +0,0 @@
-from .batch import Batch
-from .dataset import EpisodeDataset
-from .episode import Episode
-from .episode_count import EpisodeCountManager
-from .sampler import BatchSampler
-from .segment import SegmentId
-from .utils import collate_segments_to_batch, DatasetTraverser, make_segment

delta-iris/src/data/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (543 Bytes)

delta-iris/src/data/__pycache__/batch.cpython-310.pyc DELETED Viewed

Binary file (1.46 kB)

delta-iris/src/data/__pycache__/dataset.cpython-310.pyc DELETED Viewed

Binary file (4.9 kB)

delta-iris/src/data/__pycache__/episode.cpython-310.pyc DELETED Viewed

Binary file (1.8 kB)

delta-iris/src/data/__pycache__/episode_count.cpython-310.pyc DELETED Viewed

Binary file (2.78 kB)

delta-iris/src/data/__pycache__/sampler.cpython-310.pyc DELETED Viewed

Binary file (1.96 kB)

delta-iris/src/data/__pycache__/segment.cpython-310.pyc DELETED Viewed

Binary file (1.06 kB)

delta-iris/src/data/__pycache__/utils.cpython-310.pyc DELETED Viewed

Binary file (3.98 kB)

delta-iris/src/data/batch.py DELETED Viewed

@@ -1,24 +0,0 @@
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import List
-import torch
-from .segment import SegmentId
-@dataclass
-class Batch:
-    observations: torch.ByteTensor
-    actions: torch.LongTensor
-    rewards: torch.FloatTensor
-    ends: torch.LongTensor
-    mask_padding: torch.BoolTensor
-    segment_ids: List[SegmentId]
-    def pin_memory(self) -> Batch:
-        return Batch(**{k: v if k == 'segment_ids' else v.pin_memory() for k, v in self.__dict__.items()})
-    def to(self, device: torch.device) -> Batch:
-        return Batch(**{k: v if k == 'segment_ids' else v.to(device) for k, v in self.__dict__.items()})

delta-iris/src/data/dataset.py DELETED Viewed

@@ -1,104 +0,0 @@
-from pathlib import Path
-import shutil
-from typing import Dict, Optional, Union
-import numpy as np
-import torch
-from .episode import Episode
-from .segment import Segment, SegmentId
-from .utils import make_segment
-class EpisodeDataset(torch.utils.data.Dataset):
-    def __init__(self, directory: Path, name: str) -> None:
-        super().__init__()
-        self.name = name
-        self.directory = Path(directory)
-        self.num_episodes, self.num_steps, self.start_idx, self.lengths = None, None, None, None
-        if not self.directory.is_dir():
-            self._init_empty()
-        else:
-            self._load_info()
-            print(f'({name}) {self.num_episodes} episodes, {self.num_steps} steps.')
-    @property
-    def info_path(self) -> Path:
-        return self.directory / 'info.pt'
-    @property
-    def info(self) -> Dict[str, Union[int, np.ndarray]]:
-        return {'num_episodes': self.num_episodes, 'num_steps': self.num_steps, 'start_idx': self.start_idx, 'lengths': self.lengths}
-    def __len__(self) -> int:
-        return self.num_steps
-    def __getitem__(self, segment_id: SegmentId) -> Segment:
-        return self._load_segment(segment_id)
-    def _init_empty(self) -> None:
-        self.directory.mkdir(parents=True, exist_ok=False)
-        self.num_episodes = 0
-        self.num_steps = 0
-        self.start_idx = np.array([], dtype=np.int64)
-        self.lengths = np.array([], dtype=np.int64)
-        self.save_info()
-    def _load_info(self) -> None:
-        info = torch.load(self.info_path)
-        self.num_steps = info['num_steps']
-        self.num_episodes = info['num_episodes']
-        self.start_idx = info['start_idx']
-        self.lengths = info['lengths']
-    def save_info(self) -> None:
-        torch.save(self.info, self.info_path)
-    def clear(self) -> None:
-        shutil.rmtree(self.directory)
-        self._init_empty()
-    def _get_episode_path(self, episode_id: int) -> Path:
-        n = 3 # number of hierarchies
-        powers = np.arange(n)
-        subfolders = list(map(int, np.floor((episode_id % 10 ** (1 + powers)) / 10 ** powers) * 10 ** powers))[::-1]
-        return self.directory / '/'.join(list(map(lambda x: f'{x[1]:0{n - x[0]}d}', enumerate(subfolders)))) / f'{episode_id}.pt'
-    def _load_segment(self, segment_id: SegmentId, should_pad: bool = True) -> Segment:
-        episode = self.load_episode(segment_id.episode_id)
-        return make_segment(episode, segment_id, should_pad)
-    def load_episode(self, episode_id: int) -> Episode:
-        return Episode(**torch.load(self._get_episode_path(episode_id)))
-    def add_episode(self, episode: Episode, *, episode_id: Optional[int] = None) -> int:
-        if episode_id is None:
-            episode_id = self.num_episodes
-            self.start_idx = np.concatenate((self.start_idx, np.array([self.num_steps])))
-            self.lengths = np.concatenate((self.lengths, np.array([len(episode)])))
-            self.num_steps += len(episode)
-            self.num_episodes += 1
-        else:
-            assert episode_id < self.num_episodes
-            old_episode = self.load_episode(episode_id)
-            episode = old_episode.merge(episode)
-            incr_num_steps = len(episode) - len(old_episode)
-            self.lengths[episode_id] = len(episode)
-            self.start_idx[episode_id + 1:] += incr_num_steps
-            self.num_steps += incr_num_steps
-        episode_path = self._get_episode_path(episode_id)
-        episode_path.parent.mkdir(parents=True, exist_ok=True)
-        torch.save(episode.__dict__, episode_path.with_suffix('.tmp'))
-        episode_path.with_suffix('.tmp').rename(episode_path)
-        return episode_id
-    def get_episode_id_from_global_idx(self, global_idx: np.ndarray) -> np.ndarray:
-        return (np.argmax(self.start_idx.reshape(-1, 1) > global_idx, axis=0) - 1) % self.num_episodes
-    def get_global_idx_from_segment_id(self, segment_id: SegmentId) -> np.ndarray:
-        start_idx = self.start_idx[segment_id.episode_id]
-        return np.arange(start_idx + segment_id.start, start_idx + segment_id.stop)

delta-iris/src/data/episode.py DELETED Viewed

@@ -1,41 +0,0 @@
-from __future__ import annotations
-from dataclasses import dataclass
-import torch
-@dataclass
-class EpisodeMetrics:
-    episode_length: int
-    episode_return: float
-@dataclass
-class Episode:
-    observations: torch.ByteTensor
-    actions: torch.LongTensor
-    rewards: torch.FloatTensor
-    ends: torch.LongTensor
-    def __post_init__(self):
-        assert len(self.observations) == len(self.actions) == len(self.rewards) == len(self.ends)
-        if self.ends.sum() > 0:
-            idx_end = torch.argmax(self.ends) + 1
-            self.observations = self.observations[:idx_end]
-            self.actions = self.actions[:idx_end]
-            self.rewards = self.rewards[:idx_end]
-            self.ends = self.ends[:idx_end]
-    def __len__(self) -> int:
-        return self.observations.size(0)
-    def merge(self, other: Episode) -> Episode:
-        return Episode(
-            torch.cat((self.observations, other.observations), dim=0),
-            torch.cat((self.actions, other.actions), dim=0),
-            torch.cat((self.rewards, other.rewards), dim=0),
-            torch.cat((self.ends, other.ends), dim=0),
-        )
-    def compute_metrics(self) -> EpisodeMetrics:
-        return EpisodeMetrics(len(self), self.rewards.sum())

delta-iris/src/data/episode_count.py DELETED Viewed

@@ -1,41 +0,0 @@
-from pathlib import Path
-from typing import Tuple
-import numpy as np
-import torch
-from .dataset import EpisodeDataset
-class EpisodeCountManager:
-    def __init__(self, dataset: EpisodeDataset) -> None:
-        self.dataset = dataset
-        self.all_counts = dict()
-    def load(self, path_to_checkpoint: Path) -> None:
-        self.all_counts = torch.load(path_to_checkpoint)
-        assert all([counts.shape[0] == self.dataset.num_episodes for counts in self.all_counts.values()])
-    def save(self, path_to_checkpoint: Path) -> None:
-        torch.save(self.all_counts, path_to_checkpoint)
-    def register(self, *keys: Tuple[str]) -> None:
-        assert all([key not in self.all_counts for key in keys])
-        self.all_counts.update({key: np.zeros(self.dataset.num_episodes, dtype=np.int64) for key in keys})
-    def add_episode(self, episode_id: int) -> None:
-        for key, counts in self.all_counts.items():
-            assert episode_id <= counts.shape[0]
-            if episode_id == counts.shape[0]:
-                self.all_counts[key] = np.concatenate((counts, np.zeros(1, dtype=np.int64)))
-            assert self.all_counts[key].shape[0] == self.dataset.num_episodes
-    def increment_episode_count(self, key: str, episode_id: int) -> None:
-        assert key in self.all_counts
-        self.all_counts[key][episode_id] += 1
-    def compute_probabilities(self, key: str, alpha: float) -> np.ndarray:
-        assert key in self.all_counts
-        inverse_counts = 1 / (1 + self.all_counts[key])
-        p = inverse_counts ** alpha
-        return p / p.sum()

delta-iris/src/data/sampler.py DELETED Viewed

@@ -1,42 +0,0 @@
-from typing import Generator, List
-import numpy as np
-import torch
-from .dataset import EpisodeDataset
-from .segment import SegmentId
-class BatchSampler(torch.utils.data.Sampler):
-    def __init__(self, dataset: EpisodeDataset, num_steps_per_epoch: int, batch_size: int, sequence_length: int, can_sample_beyond_end: bool) -> None:
-        super().__init__(dataset)
-        self.dataset = dataset
-        self.probabilities = None
-        self.num_steps_per_epoch = num_steps_per_epoch
-        self.batch_size = batch_size
-        self.sequence_length = sequence_length
-        self.can_sample_beyond_end = can_sample_beyond_end
-    def __len__(self) -> int:
-        return self.num_steps_per_epoch
-    def __iter__(self) -> Generator[List[SegmentId], None, None]:
-        for _ in range(self.num_steps_per_epoch):
-            yield self.sample()
-    def sample(self) -> List[SegmentId]:
-        episode_ids = np.random.choice(np.arange(self.dataset.num_episodes), size=self.batch_size, replace=True, p=self.probabilities)
-        timesteps = np.random.randint(low=0, high=self.dataset.lengths[episode_ids])
-        # padding allowed, both before start and after end
-        if self.can_sample_beyond_end:
-            starts = timesteps - np.random.randint(0, self.sequence_length, len(timesteps))
-            stops = starts + self.sequence_length
-        # padding allowed only before start
-        else:
-            stops = np.minimum(self.dataset.lengths[episode_ids], timesteps + 1 + np.random.randint(0, self.sequence_length, len(timesteps)))
-            starts = stops - self.sequence_length
-        return list(map(lambda x: SegmentId(*x), zip(episode_ids, starts, stops)))

delta-iris/src/data/segment.py DELETED Viewed

@@ -1,25 +0,0 @@
-from __future__ import annotations
-from dataclasses import dataclass
-import torch
-@dataclass
-class SegmentId:
-    episode_id: int
-    start: int
-    stop: int
-@dataclass
-class Segment:
-    observations: torch.ByteTensor
-    actions: torch.LongTensor
-    rewards: torch.FloatTensor
-    ends: torch.LongTensor
-    mask_padding: torch.BoolTensor
-    id: SegmentId
-    @property
-    def effective_size(self) -> int:
-        return self.mask_padding.sum().item()

delta-iris/src/data/utils.py DELETED Viewed

@@ -1,69 +0,0 @@
-import math
-from typing import Generator, List
-import torch
-from .batch import Batch
-from .episode import Episode
-from .segment import Segment, SegmentId
-def collate_segments_to_batch(segments: List[Segment]) -> Batch:
-    return Batch(
-        torch.stack(list(map(lambda s: s.observations, segments))).div(255),
-        torch.stack(list(map(lambda s: s.actions, segments))),
-        torch.stack(list(map(lambda s: s.rewards, segments))),
-        torch.stack(list(map(lambda s: s.ends, segments))),
-        torch.stack(list(map(lambda s: s.mask_padding, segments))),
-        list(map(lambda segment: segment.id, segments))
-    )
-def make_segment(episode: Episode, segment_id: SegmentId, should_pad: bool = True) -> Segment:
-    assert segment_id.start < len(episode) and segment_id.stop > 0 and segment_id.start < segment_id.stop
-    padding_length_right = max(0, segment_id.stop - len(episode))
-    padding_length_left = max(0, -segment_id.start)
-    assert padding_length_right == padding_length_left == 0 or should_pad
-    def pad(x):
-        pad_right = torch.nn.functional.pad(x, [0 for _ in range(2 * x.ndim - 1)] + [padding_length_right]) if padding_length_right > 0 else x
-        return torch.nn.functional.pad(pad_right, [0 for _ in range(2 * x.ndim - 2)] + [padding_length_left, 0]) if padding_length_left > 0 else pad_right
-    start = max(0, segment_id.start)
-    stop = min(len(episode), segment_id.stop)
-    return Segment(
-        pad(episode.observations[start:stop]),
-        pad(episode.actions[start:stop]),
-        pad(episode.rewards[start:stop]),
-        pad(episode.ends[start:stop]),
-        mask_padding=torch.cat((torch.zeros(padding_length_left), torch.ones(stop - start), torch.zeros(padding_length_right))).bool(),
-        id=SegmentId(segment_id.episode_id, start, stop)
-    )
-class DatasetTraverser:
-    def __init__(self, dataset, batch_num_samples: int, chunk_size: int) -> None:
-        self.dataset = dataset
-        self.batch_num_samples = batch_num_samples
-        self.chunk_size = chunk_size
-        self._num_batches = math.ceil(sum([math.ceil(dataset.lengths[episode_id] / chunk_size) - int(dataset.lengths[episode_id] % chunk_size == 1) for episode_id in range(dataset.num_episodes)]) / batch_num_samples)
-    def __len__(self) -> int:
-        return self._num_batches
-    def __iter__(self) -> Generator[Batch, None, None]:
-        chunks = []
-        for episode_id in range(self.dataset.num_episodes):
-            episode = self.dataset.load_episode(episode_id)
-            chunks.extend(make_segment(episode, SegmentId(episode_id, start=i * self.chunk_size, stop=(i + 1) * self.chunk_size), should_pad=True) for i in range(math.ceil(len(episode) / self.chunk_size)))
-            if chunks[-1].effective_size < 2:
-                chunks.pop()
-            while len(chunks) >= self.batch_num_samples:
-                yield collate_segments_to_batch(chunks[:self.batch_num_samples])
-                chunks = chunks[self.batch_num_samples:]
-        if len(chunks) > 0:
-            yield collate_segments_to_batch(chunks)