Fill-Mask
Transformers
code
iamthe66epitaph's picture
Upload folder using huggingface_hub
8193465 verified
raw
history blame
1 kB
from dataclasses import dataclass
from typing import Callable, Optional
import datasets
@dataclass
class GeneratorConfig(datasets.BuilderConfig):
generator: Optional[Callable] = None
gen_kwargs: Optional[dict] = None
features: Optional[datasets.Features] = None
split: datasets.NamedSplit = datasets.Split.TRAIN
def __post_init__(self):
super().__post_init__()
if self.generator is None:
raise ValueError("generator must be specified")
if self.gen_kwargs is None:
self.gen_kwargs = {}
class Generator(datasets.GeneratorBasedBuilder):
BUILDER_CONFIG_CLASS = GeneratorConfig
def _info(self):
return datasets.DatasetInfo(features=self.config.features)
def _split_generators(self, dl_manager):
return [datasets.SplitGenerator(name=self.config.split, gen_kwargs=self.config.gen_kwargs)]
def _generate_examples(self, **gen_kwargs):
yield from enumerate(self.config.generator(**gen_kwargs))