mdevoz
/

tanadata

Transformers

Safetensors

unsloth

Model card Files Files and versions

xet

Community

mdevoz commited on Mar 23, 2025

Commit

416fa7f

verified ·

1 Parent(s): 3367357

Update tanadata.py

Browse files

Files changed (1) hide show

tanadata.py +57 -57

tanadata.py CHANGED Viewed

@@ -1,57 +1,57 @@
-import json
-import datasets
-# You can update these with more detailed information.
-_DESCRIPTION = """
-TanaData is a custom dataset for instruction-response tasks.
-"""
-_CITATION = """
-@misc{tanadata2025,
-  title={TanaData Dataset},
-  year={2025},
-  note={Custom dataset hosted on Hugging Face}
-}
-"""
-class TanaData(datasets.GeneratorBasedBuilder):
-    VERSION = datasets.Version("1.0.0")
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features({
-                "instruction": datasets.Value("string"),
-                "input": datasets.Value("string"),
-                "output": datasets.Value("string"),
-            }),
-            supervised_keys=None,
-            homepage="https://huggingface.co/mdevoz/tanadata",
-            citation=_CITATION,
-        )
-    def _split_generators(self, dl_manager):
-        # This URL points to your JSON file in the repository.
-        file_path = dl_manager.download_and_extract(
-            "https://huggingface.co/mdevoz/tanadata/resolve/main/tana_z.json"
-        )
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                gen_kwargs={"filepath": file_path}
-            )
-        ]
-    def _generate_examples(self, filepath):
-        # Adjust this logic based on your JSON file structure.
-        with open(filepath, encoding="utf-8") as f:
-            # If your file is a JSON array of examples:
-            data = json.load(f)
-            for idx, example in enumerate(data):
-                yield idx, example
-# For testing, you can uncomment the following lines locally:
-# if __name__ == "__main__":
-#     from datasets import load_dataset
-#     dataset = load_dataset(__file__, name="tanadata")
-#     print(dataset)

+import json
+import datasets
+# You can update these with more detailed information.
+_DESCRIPTION = """
+TanaData is a custom dataset for instruction-response tasks.
+"""
+_CITATION = """
+@misc{tanadata2025,
+  title={TanaData Dataset},
+  year={2025},
+  note={Custom dataset hosted on Hugging Face}
+}
+"""
+class TanaData(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("1.0.0")
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features({
+                "instruction": datasets.Value("string"),
+                "input": datasets.Value("string"),
+                "output": datasets.Value("string"),
+            }),
+            supervised_keys=None,
+            homepage="https://huggingface.co/mdevoz/tanadata",
+            citation=_CITATION,
+        )
+    def _split_generators(self, dl_manager):
+        # This URL points to your JSON file in the repository.
+        file_path = dl_manager.download_and_extract(
+            "https://huggingface.co/mdevoz/tanadata/resolve/main/tanadata.json"
+        )
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={"filepath": file_path}
+            )
+        ]
+    def _generate_examples(self, filepath):
+        # Adjust this logic based on your JSON file structure.
+        with open(filepath, encoding="utf-8") as f:
+            # If your file is a JSON array of examples:
+            data = json.load(f)
+            for idx, example in enumerate(data):
+                yield idx, example
+# For testing, you can uncomment the following lines locally:
+# if __name__ == "__main__":
+#     from datasets import load_dataset
+#     dataset = load_dataset(__file__, name="tanadata")
+#     print(dataset)