import os
import json
import glob

def generate_dictionary():
    dataset_dir = '../../../data/set5__culture'
    output_file = '../docs/dictionary/dictionary_nanayoni.md'
    
    # We'll use a dictionary to store terms: term -> {meaning, races: set}
    consolidated_dict = {}
    
    # Exclude the multi-race dataset to avoid duplication, though we could use ONLY that if it's complete.
    # Let's use all individual race files.
    for filepath in glob.glob(f'{dataset_dir}/*.json'):
        if 'multi_race' in filepath:
            continue
            
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
            
        race_name = data.get('race', os.path.basename(filepath).split('.')[0].capitalize())
        dictionary = data.get('dictionary', {})
        
        for category in ['anatomy', 'biology', 'customs', 'vulgar_slang']:
            terms = dictionary.get(category, {})
            for term, meaning in terms.items():
                if term not in consolidated_dict:
                    consolidated_dict[term] = {'meaning': meaning, 'races': set()}
                consolidated_dict[term]['races'].add(race_name)
                # If meanings differ, we could append them, but usually they are consistent per term
                # However, terms like 'vash' might have slightly different meanings or the same.
                # Let's keep the first meaning found or a combined one if they are very different?
                # For now, let's just stick with the meaning found.

    # Sort terms alphabetically
    sorted_terms = sorted(consolidated_dict.keys())
    
    markdown_lines = [
        "# “नानायोनि-कामभेद-संग्रहः” (Nānāyoni-Kāmabheda-Saṅgrahaḥ)",
        "## “A Compendium of Erotic Variations Across Many Forms”",
        "**Authored by नग्नाक्षी (Nagnākṣī)**",
        "",
        "---",
        ""
    ]
    
    for term in sorted_terms:
        entry = consolidated_dict[term]
        races_str = ", ".join(sorted(list(entry['races'])))
        line = f"**{term}**:{entry['meaning']} Applies to: {races_str}."
        markdown_lines.append(line)
        markdown_lines.append("")

    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("\n".join(markdown_lines))
        
    print(f"Generated dictionary markdown at {output_file}")

if __name__ == '__main__':
    generate_dictionary()