File size: 1,002 Bytes
8193465 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from dataclasses import dataclass
from typing import Callable, Optional
import datasets
@dataclass
class GeneratorConfig(datasets.BuilderConfig):
generator: Optional[Callable] = None
gen_kwargs: Optional[dict] = None
features: Optional[datasets.Features] = None
split: datasets.NamedSplit = datasets.Split.TRAIN
def __post_init__(self):
super().__post_init__()
if self.generator is None:
raise ValueError("generator must be specified")
if self.gen_kwargs is None:
self.gen_kwargs = {}
class Generator(datasets.GeneratorBasedBuilder):
BUILDER_CONFIG_CLASS = GeneratorConfig
def _info(self):
return datasets.DatasetInfo(features=self.config.features)
def _split_generators(self, dl_manager):
return [datasets.SplitGenerator(name=self.config.split, gen_kwargs=self.config.gen_kwargs)]
def _generate_examples(self, **gen_kwargs):
yield from enumerate(self.config.generator(**gen_kwargs))
|