Yesh05
/

fantecchi

Model card Files Files and versions

fantecchi / core /application_layer /documentation /generate_dictionary_md.py

Yesh05's picture

Initial commit with LFS support for images

9a8d870 19 days ago

history blame contribute delete

2.47 kB

	import os
	import json
	import glob

	def generate_dictionary():
	dataset_dir = '../../../data/set5__culture'
	output_file = '../docs/dictionary/dictionary_nanayoni.md'

	# We'll use a dictionary to store terms: term -> {meaning, races: set}
	consolidated_dict = {}

	# Exclude the multi-race dataset to avoid duplication, though we could use ONLY that if it's complete.
	# Let's use all individual race files.
	for filepath in glob.glob(f'{dataset_dir}/*.json'):
	if 'multi_race' in filepath:
	continue

	with open(filepath, 'r', encoding='utf-8') as f:
	data = json.load(f)

	race_name = data.get('race', os.path.basename(filepath).split('.')[0].capitalize())
	dictionary = data.get('dictionary', {})

	for category in ['anatomy', 'biology', 'customs', 'vulgar_slang']:
	terms = dictionary.get(category, {})
	for term, meaning in terms.items():
	if term not in consolidated_dict:
	consolidated_dict[term] = {'meaning': meaning, 'races': set()}
	consolidated_dict[term]['races'].add(race_name)
	# If meanings differ, we could append them, but usually they are consistent per term
	# However, terms like 'vash' might have slightly different meanings or the same.
	# Let's keep the first meaning found or a combined one if they are very different?
	# For now, let's just stick with the meaning found.

	# Sort terms alphabetically
	sorted_terms = sorted(consolidated_dict.keys())

	markdown_lines = [
	"# “नानायोनि-कामभेद-संग्रहः” (Nānāyoni-Kāmabheda-Saṅgrahaḥ)",
	"## “A Compendium of Erotic Variations Across Many Forms”",
	"Authored by नग्नाक्षी (Nagnākṣī)",
	"",
	"---",
	""
	]

	for term in sorted_terms:
	entry = consolidated_dict[term]
	races_str = ", ".join(sorted(list(entry['races'])))
	line = f"{term}:{entry['meaning']} Applies to: {races_str}."
	markdown_lines.append(line)
	markdown_lines.append("")

	with open(output_file, 'w', encoding='utf-8') as f:
	f.write("\n".join(markdown_lines))

	print(f"Generated dictionary markdown at {output_file}")

	if __name__ == '__main__':
	generate_dictionary()