| import json
|
| import datasets
|
|
|
|
|
| _DESCRIPTION = """
|
| TanaData is a custom dataset for instruction-response tasks.
|
| """
|
|
|
| _CITATION = """
|
| @misc{tanadata2025,
|
| title={TanaData Dataset},
|
| year={2025},
|
| note={Custom dataset hosted on Hugging Face}
|
| }
|
| """
|
|
|
| class TanaData(datasets.GeneratorBasedBuilder):
|
| VERSION = datasets.Version("1.0.0")
|
|
|
| def _info(self):
|
| return datasets.DatasetInfo(
|
| description=_DESCRIPTION,
|
| features=datasets.Features({
|
| "instruction": datasets.Value("string"),
|
| "input": datasets.Value("string"),
|
| "output": datasets.Value("string"),
|
| }),
|
| supervised_keys=None,
|
| homepage="https://huggingface.co/mdevoz/tanadata",
|
| citation=_CITATION,
|
| )
|
|
|
| def _split_generators(self, dl_manager):
|
|
|
| file_path = dl_manager.download_and_extract(
|
| "https://huggingface.co/mdevoz/tanadata/resolve/main/tana_z.json"
|
| )
|
| return [
|
| datasets.SplitGenerator(
|
| name=datasets.Split.TRAIN,
|
| gen_kwargs={"filepath": file_path}
|
| )
|
| ]
|
|
|
| def _generate_examples(self, filepath):
|
|
|
| with open(filepath, encoding="utf-8") as f:
|
|
|
| data = json.load(f)
|
| for idx, example in enumerate(data):
|
| yield idx, example
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|