Delete tanadata.py
Browse files- tanadata.py +0 -57
tanadata.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import datasets
|
| 3 |
-
|
| 4 |
-
# You can update these with more detailed information.
|
| 5 |
-
_DESCRIPTION = """
|
| 6 |
-
TanaData is a custom dataset for instruction-response tasks.
|
| 7 |
-
"""
|
| 8 |
-
|
| 9 |
-
_CITATION = """
|
| 10 |
-
@misc{tanadata2025,
|
| 11 |
-
title={TanaData Dataset},
|
| 12 |
-
year={2025},
|
| 13 |
-
note={Custom dataset hosted on Hugging Face}
|
| 14 |
-
}
|
| 15 |
-
"""
|
| 16 |
-
|
| 17 |
-
class TanaData(datasets.GeneratorBasedBuilder):
|
| 18 |
-
VERSION = datasets.Version("1.0.0")
|
| 19 |
-
|
| 20 |
-
def _info(self):
|
| 21 |
-
return datasets.DatasetInfo(
|
| 22 |
-
description=_DESCRIPTION,
|
| 23 |
-
features=datasets.Features({
|
| 24 |
-
"instruction": datasets.Value("string"),
|
| 25 |
-
"input": datasets.Value("string"),
|
| 26 |
-
"output": datasets.Value("string"),
|
| 27 |
-
}),
|
| 28 |
-
supervised_keys=None,
|
| 29 |
-
homepage="https://huggingface.co/mdevoz/tanadata",
|
| 30 |
-
citation=_CITATION,
|
| 31 |
-
)
|
| 32 |
-
|
| 33 |
-
def _split_generators(self, dl_manager):
|
| 34 |
-
# This URL points to your JSON file in the repository.
|
| 35 |
-
file_path = dl_manager.download_and_extract(
|
| 36 |
-
"https://huggingface.co/mdevoz/tanadata/resolve/main/tanadata.json"
|
| 37 |
-
)
|
| 38 |
-
return [
|
| 39 |
-
datasets.SplitGenerator(
|
| 40 |
-
name=datasets.Split.TRAIN,
|
| 41 |
-
gen_kwargs={"filepath": file_path}
|
| 42 |
-
)
|
| 43 |
-
]
|
| 44 |
-
|
| 45 |
-
def _generate_examples(self, filepath):
|
| 46 |
-
# Adjust this logic based on your JSON file structure.
|
| 47 |
-
with open(filepath, encoding="utf-8") as f:
|
| 48 |
-
# If your file is a JSON array of examples:
|
| 49 |
-
data = json.load(f)
|
| 50 |
-
for idx, example in enumerate(data):
|
| 51 |
-
yield idx, example
|
| 52 |
-
|
| 53 |
-
# For testing, you can uncomment the following lines locally:
|
| 54 |
-
# if __name__ == "__main__":
|
| 55 |
-
# from datasets import load_dataset
|
| 56 |
-
# dataset = load_dataset(__file__, name="tanadata")
|
| 57 |
-
# print(dataset)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|