Spaces:
Runtime error
Runtime error
Commit
ยท
2d84a88
1
Parent(s):
e1cb58c
add task categories
Browse files
src/synthetic_dataset_generator/_distiset.py
CHANGED
|
@@ -81,6 +81,15 @@ class CustomDistisetWithAdditionalTag(distilabel.distiset.Distiset):
|
|
| 81 |
dataset[0] if not isinstance(dataset, dict) else dataset["train"][0]
|
| 82 |
)
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
readme_metadata = {}
|
| 85 |
if repo_id and token:
|
| 86 |
readme_metadata = self._extract_readme_metadata(repo_id, token)
|
|
@@ -90,6 +99,7 @@ class CustomDistisetWithAdditionalTag(distilabel.distiset.Distiset):
|
|
| 90 |
"size_categories": size_categories_parser(
|
| 91 |
max(len(dataset) for dataset in self.values())
|
| 92 |
),
|
|
|
|
| 93 |
"tags": [
|
| 94 |
"synthetic",
|
| 95 |
"distilabel",
|
|
|
|
| 81 |
dataset[0] if not isinstance(dataset, dict) else dataset["train"][0]
|
| 82 |
)
|
| 83 |
|
| 84 |
+
keys = list(sample_records.keys())
|
| 85 |
+
if len(keys) != 2 or not (
|
| 86 |
+
("label" in keys and "text" in keys)
|
| 87 |
+
or ("labels" in keys and "text" in keys)
|
| 88 |
+
):
|
| 89 |
+
task_categories = ["text-classification"]
|
| 90 |
+
elif "prompt" in keys or "messages" in keys:
|
| 91 |
+
task_categories = ["text-generation", "text2text-generation"]
|
| 92 |
+
|
| 93 |
readme_metadata = {}
|
| 94 |
if repo_id and token:
|
| 95 |
readme_metadata = self._extract_readme_metadata(repo_id, token)
|
|
|
|
| 99 |
"size_categories": size_categories_parser(
|
| 100 |
max(len(dataset) for dataset in self.values())
|
| 101 |
),
|
| 102 |
+
"task_categories": task_categories,
|
| 103 |
"tags": [
|
| 104 |
"synthetic",
|
| 105 |
"distilabel",
|