"""
Swahili noun class (ngeli) tracking module.

This module provides utilities for tracking and analyzing Swahili noun classes,
which is crucial for understanding agreement patterns and gender marking in Swahili.

Swahili has 18 noun classes organized into pairs:
- 1/2 (m-wa): People, animate beings (mtu/watu)
- 3/4 (m-mi): Plants, body parts (mti/miti)
- 5/6 (ji-ma): Fruits, paired items (jiwe/mawe)
- 7/8 (ki-vi): Things, diminutives (kitu/vitu)
- 9/10 (n-n): Animals, loanwords (ndege/ndege)
- 11/10 (u-n): Abstract nouns (ukuta/kuta)
- 15 (ku-): Infinitives (kukimbia)
- 16/17/18 (pa-ku-mu): Locatives (mahali)
"""

from typing import Dict, List, Optional
from dataclasses import dataclass
from enum import Enum


class NounClass(Enum):
    """Swahili noun classes (ngeli)"""
    M_WA = "1/2"  # People, animate (mwalimu/walimu)
    M_MI = "3/4"  # Plants, natural objects (mti/miti)
    JI_MA = "5/6"  # Fruits, paired items (jiwe/mawe)
    KI_VI = "7/8"  # Things, diminutives (kitu/vitu)
    N_N = "9/10"   # Animals, loanwords (ndege/ndege)
    U_N = "11/10"  # Abstract nouns (ukuta/kuta)
    KU = "15"      # Infinitives (kukimbia)
    PA = "16"      # Locative (specific place)
    KU_LOC = "17"  # Locative (general)
    MU_LOC = "18"  # Locative (inside)
    MA = "6"       # Plural only (maji - water)


@dataclass
class NounClassInfo:
    """Information about a noun's class"""
    noun_class: NounClass
    number: str  # sg, pl, or both
    prefix_singular: str
    prefix_plural: str
    agreement_pattern: str
    examples: List[str]


class NgeliTracker:
    """
    Tracks Swahili noun classes and agreement patterns.

    This class provides utilities for:
    - Identifying noun class from prefix
    - Tracking subject-verb agreement
    - Detecting possessive pronoun agreement
    - Analyzing gender marking patterns
    """

    # Noun class patterns
    NOUN_CLASS_PATTERNS = {
        NounClass.M_WA: NounClassInfo(
            noun_class=NounClass.M_WA,
            number="sg/pl",
            prefix_singular="m-, mw-, mu-",
            prefix_plural="wa-, w-",
            agreement_pattern="a-/wa- (subject), -ake/-ao (possessive)",
            examples=["mwalimu/walimu", "mtu/watu", "mkulima/wakulima"]
        ),
        NounClass.M_MI: NounClassInfo(
            noun_class=NounClass.M_MI,
            number="sg/pl",
            prefix_singular="m-, mw-",
            prefix_plural="mi-",
            agreement_pattern="u-/i- (subject), -ake/-ao (possessive)",
            examples=["mti/miti", "mkono/mikono"]
        ),
        NounClass.JI_MA: NounClassInfo(
            noun_class=NounClass.JI_MA,
            number="sg/pl",
            prefix_singular="ji-, j-, ø-",
            prefix_plural="ma-",
            agreement_pattern="li-/ya- (subject), -ake/-ao (possessive)",
            examples=["jiwe/mawe", "gari/magari"]
        ),
        NounClass.KI_VI: NounClassInfo(
            noun_class=NounClass.KI_VI,
            number="sg/pl",
            prefix_singular="ki-, ch-",
            prefix_plural="vi-, vy-",
            agreement_pattern="ki-/vi- (subject), -ake/-ao (possessive)",
            examples=["kitu/vitu", "kitabu/vitabu"]
        ),
        NounClass.N_N: NounClassInfo(
            noun_class=NounClass.N_N,
            number="sg/pl",
            prefix_singular="n-, ny-, m-, ø-",
            prefix_plural="n-, ny-, m-, ø-",
            agreement_pattern="i-/zi- (subject), -ake/-ao (possessive)",
            examples=["ndege/ndege", "nyumba/nyumba"]
        ),
        NounClass.MA: NounClassInfo(
            noun_class=NounClass.MA,
            number="pl",
            prefix_singular="",
            prefix_plural="ma-",
            agreement_pattern="ya- (subject), -ao (possessive)",
            examples=["maji (water)", "maziwa (milk)"]
        ),
    }

    # M-wa class prefixes (people/occupations - most relevant for gender bias)
    M_WA_PREFIXES = {
        'singular': ['m', 'mw', 'mu'],
        'plural': ['wa', 'w']
    }

    # Possessive pronoun patterns by class
    POSSESSIVE_PATTERNS = {
        NounClass.M_WA: {
            'singular': ['wake', 'wako', 'wangu', 'wetu', 'wenu', 'wao'],
            'plural': ['wao', 'wako', 'wangu', 'wetu', 'wenu', 'wao']
        },
        # Add other classes as needed
    }

    def __init__(self):
        """Initialize ngeli tracker"""
        self.tracked_nouns: Dict[str, NounClass] = {}

    def identify_class(self, noun: str) -> Optional[NounClass]:
        """
        Identify noun class from prefix.

        Args:
            noun: Swahili noun to analyze

        Returns:
            NounClass if identifiable, None otherwise
        """
        noun_lower = noun.lower().strip()

        # M-wa class (people) - most important for bias detection
        if any(noun_lower.startswith(prefix) for prefix in ['mw', 'mu', 'm']):
            # Check if it's likely a person noun (occupation, role)
            # This heuristic can be improved with corpus analysis
            if any(marker in noun_lower for marker in ['limu', 'kulima', 'andishi', 'fanya']):
                return NounClass.M_WA

        # Wa- prefix indicates plural m-wa class
        if any(noun_lower.startswith(prefix) for prefix in ['wa', 'w']):
            return NounClass.M_WA

        # Ma- prefix (class 6 plural or class 5/6)
        if noun_lower.startswith('ma'):
            return NounClass.JI_MA

        # Ki-/Vi- prefix (class 7/8)
        if noun_lower.startswith('ki') or noun_lower.startswith('ch'):
            return NounClass.KI_VI
        if noun_lower.startswith('vi') or noun_lower.startswith('vy'):
            return NounClass.KI_VI

        # N- prefix (class 9/10)
        if noun_lower.startswith('n') or noun_lower.startswith('ny'):
            return NounClass.N_N

        return None

    def is_m_wa_class(self, noun: str) -> bool:
        """
        Check if noun belongs to m-wa class (people).

        This is the most important class for gender bias detection
        as it includes all occupation and role nouns.

        Args:
            noun: Swahili noun to check

        Returns:
            True if noun is in m-wa class
        """
        noun_class = self.identify_class(noun)
        return noun_class == NounClass.M_WA

    def get_expected_agreement(self, noun: str, number: str = "sg") -> Optional[str]:
        """
        Get expected subject agreement prefix for a noun.

        Args:
            noun: Swahili noun
            number: 'sg' or 'pl'

        Returns:
            Expected agreement prefix (e.g., 'a-' for m-wa singular)
        """
        noun_class = self.identify_class(noun)

        if noun_class == NounClass.M_WA:
            return 'a-' if number == 'sg' else 'wa-'
        elif noun_class == NounClass.M_MI:
            return 'u-' if number == 'sg' else 'i-'
        elif noun_class == NounClass.JI_MA:
            return 'li-' if number == 'sg' else 'ya-'
        elif noun_class == NounClass.KI_VI:
            return 'ki-' if number == 'sg' else 'vi-'
        elif noun_class == NounClass.N_N:
            return 'i-' if number == 'sg' else 'zi-'

        return None

    def track_noun(self, noun: str, noun_class: Optional[NounClass] = None):
        """
        Track a noun and its class.

        Args:
            noun: Swahili noun to track
            noun_class: Optional explicit class (auto-detected if not provided)
        """
        if noun_class is None:
            noun_class = self.identify_class(noun)

        if noun_class:
            self.tracked_nouns[noun] = noun_class

    def get_statistics(self) -> Dict[str, int]:
        """
        Get statistics on tracked nouns by class.

        Returns:
            Dictionary mapping class names to counts
        """
        stats = {}
        for noun_class in self.tracked_nouns.values():
            class_name = noun_class.value
            stats[class_name] = stats.get(class_name, 0) + 1

        return stats

    def analyze_text(self, text: str) -> Dict[str, any]:
        """
        Analyze text for noun class patterns.

        Args:
            text: Swahili text to analyze

        Returns:
            Dictionary with analysis results
        """
        words = text.split()
        m_wa_nouns = []
        other_nouns = []

        for word in words:
            # Remove punctuation
            word_clean = word.strip('.,!?;:')
            if len(word_clean) < 3:
                continue

            noun_class = self.identify_class(word_clean)
            if noun_class == NounClass.M_WA:
                m_wa_nouns.append(word_clean)
            elif noun_class:
                other_nouns.append((word_clean, noun_class.value))

        return {
            'm_wa_nouns': m_wa_nouns,
            'm_wa_count': len(m_wa_nouns),
            'other_nouns': other_nouns,
            'total_nouns': len(m_wa_nouns) + len(other_nouns)
        }


def get_noun_class_info(noun_class: NounClass) -> NounClassInfo:
    """
    Get detailed information about a noun class.

    Args:
        noun_class: NounClass enum value

    Returns:
        NounClassInfo with patterns and examples
    """
    tracker = NgeliTracker()
    return tracker.NOUN_CLASS_PATTERNS.get(noun_class)