Yesh05
/

fantecchi

Model card Files Files and versions

fantecchi / fantecchi_dataset.py

Yesh05's picture

Initial commit with LFS support for images

9a8d870 about 1 month ago

history blame contribute delete

2.69 kB

	import json
	import os
	import datasets

	_CITATION = """\
	@internal{fantecchi2026,
	author = {नग्नाक्षी (Nagnākṣī)},
	title = {नानायोनि-कामभेद-संग्रहः (Nānāyoni-Kāmabheda-Saṅgrahaḥ)},
	year = {2026},
	publisher = {Fantecchi Project}
	}
	"""

	_DESCRIPTION = """\
	A comprehensive dataset of biological and cultural descriptions for fantasy races,
	focusing on unique anatomy, reproductive biology, and cultural customs.
	"""

	_HOMEPAGE = ""
	_LICENSE = "mit"

	class FantecchiDataset(datasets.GeneratorBasedBuilder):
	VERSION = datasets.Version("1.0.0")

	def _info(self):
	features = datasets.Features({
	"race": datasets.Value("string"),
	"safe": datasets.Value("string"),
	"nsfw": datasets.Value("string"),
	"category": datasets.Value("string"), # 'dataset' or 'artistic_excerpts'
	})
	return datasets.DatasetInfo(
	description=_DESCRIPTION,
	features=features,
	homepage=_HOMEPAGE,
	license=_LICENSE,
	citation=_CITATION,
	)

	def _split_generators(self, dl_manager):
	# Paths to the JSON files
	data_dir = os.path.join("data", "set5__culture")
	files = [f for f in os.listdir(data_dir) if f.endswith(".json") and "multi_race" not in f]
	return [
	datasets.SplitGenerator(
	name=datasets.Split.TRAIN,
	gen_kwargs={
	"filepaths": [os.path.join(data_dir, f) for f in files],
	},
	),
	]

	def _generate_examples(self, filepaths):
	key = 0
	for filepath in filepaths:
	with open(filepath, encoding="utf-8") as f:
	data = json.load(f)
	race = data.get("race", "Unknown")

	# Process main dataset
	for item in data.get("dataset", []):
	yield key, {
	"race": race,
	"safe": item.get("safe", ""),
	"nsfw": item.get("nsfw", item.get("unsafe", "")),
	"category": "dataset",
	}
	key += 1

	# Process artistic excerpts
	for item in data.get("artistic_excerpts", []):
	yield key, {
	"race": race,
	"safe": item.get("safe", ""),
	"nsfw": item.get("nsfw", item.get("unsafe", "")),
	"category": "artistic_excerpts",
	}
	key += 1