DeepSolanaCoder
/
DeepSeek-Coder-main
/finetune
/venv
/lib
/python3.12
/site-packages
/datasets
/io
/text.py
| from typing import Optional | |
| from .. import Features, NamedSplit | |
| from ..packaged_modules.text.text import Text | |
| from ..utils.typing import NestedDataStructureLike, PathLike | |
| from .abc import AbstractDatasetReader | |
| class TextDatasetReader(AbstractDatasetReader): | |
| def __init__( | |
| self, | |
| path_or_paths: NestedDataStructureLike[PathLike], | |
| split: Optional[NamedSplit] = None, | |
| features: Optional[Features] = None, | |
| cache_dir: str = None, | |
| keep_in_memory: bool = False, | |
| streaming: bool = False, | |
| num_proc: Optional[int] = None, | |
| **kwargs, | |
| ): | |
| super().__init__( | |
| path_or_paths, | |
| split=split, | |
| features=features, | |
| cache_dir=cache_dir, | |
| keep_in_memory=keep_in_memory, | |
| streaming=streaming, | |
| num_proc=num_proc, | |
| **kwargs, | |
| ) | |
| path_or_paths = path_or_paths if isinstance(path_or_paths, dict) else {self.split: path_or_paths} | |
| self.builder = Text( | |
| cache_dir=cache_dir, | |
| data_files=path_or_paths, | |
| features=features, | |
| **kwargs, | |
| ) | |
| def read(self): | |
| # Build iterable dataset | |
| if self.streaming: | |
| dataset = self.builder.as_streaming_dataset(split=self.split) | |
| # Build regular (map-style) dataset | |
| else: | |
| download_config = None | |
| download_mode = None | |
| verification_mode = None | |
| base_path = None | |
| self.builder.download_and_prepare( | |
| download_config=download_config, | |
| download_mode=download_mode, | |
| verification_mode=verification_mode, | |
| base_path=base_path, | |
| num_proc=self.num_proc, | |
| ) | |
| dataset = self.builder.as_dataset( | |
| split=self.split, verification_mode=verification_mode, in_memory=self.keep_in_memory | |
| ) | |
| return dataset | |