fantecchi / core /application_layer /documentation /generate_dictionary_md.py
Yesh05's picture
Initial commit with LFS support for images
9a8d870
import os
import json
import glob
def generate_dictionary():
dataset_dir = '../../../data/set5__culture'
output_file = '../docs/dictionary/dictionary_nanayoni.md'
# We'll use a dictionary to store terms: term -> {meaning, races: set}
consolidated_dict = {}
# Exclude the multi-race dataset to avoid duplication, though we could use ONLY that if it's complete.
# Let's use all individual race files.
for filepath in glob.glob(f'{dataset_dir}/*.json'):
if 'multi_race' in filepath:
continue
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
race_name = data.get('race', os.path.basename(filepath).split('.')[0].capitalize())
dictionary = data.get('dictionary', {})
for category in ['anatomy', 'biology', 'customs', 'vulgar_slang']:
terms = dictionary.get(category, {})
for term, meaning in terms.items():
if term not in consolidated_dict:
consolidated_dict[term] = {'meaning': meaning, 'races': set()}
consolidated_dict[term]['races'].add(race_name)
# If meanings differ, we could append them, but usually they are consistent per term
# However, terms like 'vash' might have slightly different meanings or the same.
# Let's keep the first meaning found or a combined one if they are very different?
# For now, let's just stick with the meaning found.
# Sort terms alphabetically
sorted_terms = sorted(consolidated_dict.keys())
markdown_lines = [
"# “नानायोनि-कामभेद-संग्रहः” (Nānāyoni-Kāmabheda-Saṅgrahaḥ)",
"## “A Compendium of Erotic Variations Across Many Forms”",
"**Authored by नग्नाक्षी (Nagnākṣī)**",
"",
"---",
""
]
for term in sorted_terms:
entry = consolidated_dict[term]
races_str = ", ".join(sorted(list(entry['races'])))
line = f"**{term}**:{entry['meaning']} Applies to: {races_str}."
markdown_lines.append(line)
markdown_lines.append("")
with open(output_file, 'w', encoding='utf-8') as f:
f.write("\n".join(markdown_lines))
print(f"Generated dictionary markdown at {output_file}")
if __name__ == '__main__':
generate_dictionary()