#!/usr/bin/env python3
"""
TajweedSST - Step 1: Tajweed Rule Parser

Generates two parallel text streams and a Rule Map:
- Visual Stream: Standard Uthmani text
- Phonetic Stream: Pronounced text for MFA
- Tajweed Map: Tags for physics validation

Tajweed Rules Implemented:
- Idgham (Assimilation)
- Iqlab (Conversion)
- Ikhfa (Concealment)
- Qalqalah (Bounce)
- Ghunnah (Nasalization)
- Madd (Elongation)
- Tafkheem/Tarqeeq (Heavy/Light)
"""

import re
from dataclasses import dataclass, field
from typing import List, Dict, Tuple, Optional
from enum import Enum

class TajweedType(Enum):
    NONE = "None"
    QALQALAH_SUGHRA = "Qalqalah_Sughra"
    QALQALAH_KUBRA = "Qalqalah_Kubra"
    GHUNNAH = "Ghunnah"
    IDGHAM_FULL = "Idgham_Full"
    IDGHAM_PARTIAL = "Idgham_Partial"
    IQLAB = "Iqlab"
    IKHFA = "Ikhfa"
    MADD_ASLI = "Madd_Asli"
    MADD_WAJIB = "Madd_Wajib"
    MADD_LAZIM = "Madd_Lazim"
    TAFKHEEM = "Tafkheem"
    TARQEEQ = "Tarqeeq"
    SILENT = "Silent"

class PhysicsCheck(Enum):
    CHECK_RMS_BOUNCE = "Check_RMS_Bounce"
    CHECK_DURATION = "Check_Duration"
    CHECK_GHUNNAH = "Check_Ghunnah"
    CHECK_FORMANT_F2 = "Check_Formant_F2"
    NONE = "None"

@dataclass
class LetterTag:
    """Tag for a single Arabic letter with Tajweed info"""
    char_visual: str
    char_phonetic: str
    position: int
    tajweed_type: TajweedType = TajweedType.NONE
    physics_check: PhysicsCheck = PhysicsCheck.NONE
    is_silent: bool = False
    madd_count: int = 0  # 0=none, 2=asli, 4=wajib, 6=lazim

@dataclass
class WordTags:
    """Tajweed tags for a complete word"""
    word_text: str
    letters: List[LetterTag] = field(default_factory=list)
    phonetic_stream: str = ""

class TajweedParser:
    """Parses Uthmani Quran text and generates Tajweed rule tags"""
    
    # Qalqalah letters: ق ط ب ج د
    QALQALAH_LETTERS = set('قطبجد')
    
    # Heavy letters (Tafkheem): خ ص ض غ ط ق ظ
    TAFKHEEM_LETTERS = set('خصضغطقظ')
    
    # Idgham letters after Nun Sakinah: ي ر م ل و ن
    IDGHAM_LETTERS = set('يرملون')
    IDGHAM_WITH_GHUNNAH = set('ينمو')  # With Ghunnah
    IDGHAM_WITHOUT_GHUNNAH = set('رل')  # Without Ghunnah
    
    # Ikhfa letters (15 letters)
    IKHFA_LETTERS = set('تثجدذزسشصضطظفقك')
    
    # Harakat (vowel marks)
    FATHA = '\u064E'
    DAMMA = '\u064F'
    KASRA = '\u0650'
    SUKUN = '\u0652'
    SHADDA = '\u0651'
    TANWEEN_FATH = '\u064B'
    TANWEEN_DAMM = '\u064C'
    TANWEEN_KASR = '\u064D'
    
    # Madd letters
    MADD_ALIF = 'ا'
    MADD_WAW = 'و'
    MADD_YA = 'ي'
    
    # Phonetic mapping (simplified Buckwalter-like)
    PHONETIC_MAP = {
        'ا': 'ā', 'ب': 'b', 'ت': 't', 'ث': 'ṯ', 'ج': 'j', 'ح': 'ḥ',
        'خ': 'ḫ', 'د': 'd', 'ذ': 'ḏ', 'ر': 'r', 'ز': 'z', 'س': 's',
        'ش': 'š', 'ص': 'ṣ', 'ض': 'ḍ', 'ط': 'ṭ', 'ظ': 'ẓ', 'ع': 'ʿ',
        'غ': 'ġ', 'ف': 'f', 'ق': 'q', 'ك': 'k', 'ل': 'l', 'م': 'm',
        'ن': 'n', 'ه': 'h', 'و': 'w', 'ي': 'y', 'ء': 'ʾ', 'ة': 'h',
        'ى': 'ā', 'ئ': 'ʾ', 'ؤ': 'ʾ', 'أ': 'ʾa', 'إ': 'ʾi', 'آ': 'ʾā'
    }
    
    def __init__(self):
        self.debug = False
    
    def parse_text(self, text: str) -> List[WordTags]:
        """Parse Uthmani text and return tagged words"""
        words = text.strip().split()
        result = []
        
        for word in words:
            word_tags = self._parse_word(word)
            result.append(word_tags)
        
        # Cross-word analysis (Nun Sakinah rules across words)
        self._analyze_cross_word_rules(result)
        
        return result
    
    def _parse_word(self, word: str) -> WordTags:
        """Parse a single word and generate letter tags"""
        word_tags = WordTags(word_text=word)
        
        # Extract base letters and diacritics
        letters_with_harakat = self._split_letters(word)
        
        for idx, (letter, harakat) in enumerate(letters_with_harakat):
            tag = self._analyze_letter(
                letter=letter,
                harakat=harakat,
                position=idx,
                context=(letters_with_harakat, idx),
                word=word
            )
            word_tags.letters.append(tag)
        
        # Generate phonetic stream
        word_tags.phonetic_stream = self._generate_phonetic_stream(word_tags.letters)
        
        return word_tags
    
    def _split_letters(self, word: str) -> List[Tuple[str, str]]:
        """Split word into (letter, harakat) pairs"""
        result = []
        i = 0
        harakat_chars = set([self.FATHA, self.DAMMA, self.KASRA, self.SUKUN,
                            self.SHADDA, self.TANWEEN_FATH, self.TANWEEN_DAMM, 
                            self.TANWEEN_KASR, '\u0653', '\u0654', '\u0655',
                            '\u0656', '\u0657', '\u0658', '\u065C', '\u0670'])
        
        while i < len(word):
            char = word[i]
            
            # Skip if it's a harakat
            if char in harakat_chars:
                i += 1
                continue
            
            # Collect harakat following this letter
            harakat = ""
            j = i + 1
            while j < len(word) and word[j] in harakat_chars:
                harakat += word[j]
                j += 1
            
            result.append((char, harakat))
            i = j
        
        return result
    
    def _analyze_letter(self, letter: str, harakat: str, position: int,
                       context: Tuple[List, int], word: str) -> LetterTag:
        """Analyze a single letter and assign Tajweed rules"""
        letters_list, idx = context
        is_last = idx == len(letters_list) - 1
        has_sukun = self.SUKUN in harakat
        has_shadda = self.SHADDA in harakat
        
        tag = LetterTag(
            char_visual=letter,
            char_phonetic=self.PHONETIC_MAP.get(letter, letter),
            position=position
        )
        
        # Rule 1: Qalqalah (ق ط ب ج د with Sukun)
        if letter in self.QALQALAH_LETTERS and (has_sukun or is_last):
            if is_last:
                tag.tajweed_type = TajweedType.QALQALAH_KUBRA
            else:
                tag.tajweed_type = TajweedType.QALQALAH_SUGHRA
            tag.physics_check = PhysicsCheck.CHECK_RMS_BOUNCE
        
        # Rule 2: Tafkheem (Heavy letters)
        elif letter in self.TAFKHEEM_LETTERS:
            tag.tajweed_type = TajweedType.TAFKHEEM
            tag.physics_check = PhysicsCheck.CHECK_FORMANT_F2
        
        # Rule 3: Madd (Elongation) - check preceding vowel
        elif letter in [self.MADD_ALIF, self.MADD_WAW, self.MADD_YA]:
            # Check for Madd conditions
            if idx > 0:
                prev_letter, prev_harakat = letters_list[idx - 1]
                if (letter == self.MADD_ALIF and self.FATHA in prev_harakat) or \
                   (letter == self.MADD_WAW and self.DAMMA in prev_harakat) or \
                   (letter == self.MADD_YA and self.KASRA in prev_harakat):
                    # Check what follows for Madd type
                    if is_last:
                        tag.tajweed_type = TajweedType.MADD_ASLI
                        tag.madd_count = 2
                    elif idx + 1 < len(letters_list):
                        next_letter, next_harakat = letters_list[idx + 1]
                        if self.SHADDA in next_harakat or self.SUKUN in next_harakat:
                            tag.tajweed_type = TajweedType.MADD_LAZIM
                            tag.madd_count = 6
                        else:
                            tag.tajweed_type = TajweedType.MADD_WAJIB
                            tag.madd_count = 4
                    tag.physics_check = PhysicsCheck.CHECK_DURATION
        
        # Rule 4: Ghunnah (Nun/Meem with Shadda)
        if letter in 'نم' and has_shadda:
            tag.tajweed_type = TajweedType.GHUNNAH
            tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
        
        # Rule 5: Nun Sakinah / Tanween rules
        if letter == 'ن' and has_sukun:
            if idx + 1 < len(letters_list):
                next_letter, _ = letters_list[idx + 1]
                # Iqlab: Nun + Ba → Mim + Ba
                if next_letter == 'ب':
                    tag.tajweed_type = TajweedType.IQLAB
                    tag.char_phonetic = 'm'  # Pronounced as Mim
                    tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
                # Idgham
                elif next_letter in self.IDGHAM_LETTERS:
                    if next_letter in self.IDGHAM_WITH_GHUNNAH:
                        tag.tajweed_type = TajweedType.IDGHAM_PARTIAL
                    else:
                        tag.tajweed_type = TajweedType.IDGHAM_FULL
                    tag.physics_check = PhysicsCheck.CHECK_DURATION
                # Ikhfa
                elif next_letter in self.IKHFA_LETTERS:
                    tag.tajweed_type = TajweedType.IKHFA
                    tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
        
        # Handle Tanween similarly
        if any(tanween in harakat for tanween in [self.TANWEEN_FATH, self.TANWEEN_DAMM, self.TANWEEN_KASR]):
            if idx + 1 < len(letters_list):
                next_letter, _ = letters_list[idx + 1]
                if next_letter == 'ب':
                    tag.tajweed_type = TajweedType.IQLAB
                    tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
                elif next_letter in self.IKHFA_LETTERS:
                    tag.tajweed_type = TajweedType.IKHFA
                    tag.physics_check = PhysicsCheck.CHECK_GHUNNAH
        
        # Silent letters (Alif after Waw al-Jama'a, etc.)
        if letter == 'ا' and not harakat and idx > 0:
            prev_letter, prev_harakat = letters_list[idx - 1]
            if prev_letter == 'و' and (self.DAMMA in prev_harakat or self.SUKUN in prev_harakat):
                tag.is_silent = True
                tag.tajweed_type = TajweedType.SILENT
                tag.char_phonetic = ''
        
        return tag
    
    def _analyze_cross_word_rules(self, words: List[WordTags]) -> None:
        """Analyze Tajweed rules that span word boundaries"""
        for i in range(len(words) - 1):
            current_word = words[i]
            next_word = words[i + 1]
            
            if not current_word.letters or not next_word.letters:
                continue
            
            last_letter = current_word.letters[-1]
            first_of_next = next_word.letters[0]
            
            # Check Nun Sakinah at end of word + next word's first letter
            if last_letter.char_visual == 'ن' and last_letter.tajweed_type == TajweedType.NONE:
                if first_of_next.char_visual == 'ب':
                    last_letter.tajweed_type = TajweedType.IQLAB
                    last_letter.char_phonetic = 'm'
                    last_letter.physics_check = PhysicsCheck.CHECK_GHUNNAH
                elif first_of_next.char_visual in self.IDGHAM_LETTERS:
                    if first_of_next.char_visual in self.IDGHAM_WITH_GHUNNAH:
                        last_letter.tajweed_type = TajweedType.IDGHAM_PARTIAL
                    else:
                        last_letter.tajweed_type = TajweedType.IDGHAM_FULL
                    last_letter.physics_check = PhysicsCheck.CHECK_DURATION
                elif first_of_next.char_visual in self.IKHFA_LETTERS:
                    last_letter.tajweed_type = TajweedType.IKHFA
                    last_letter.physics_check = PhysicsCheck.CHECK_GHUNNAH
    
    def _generate_phonetic_stream(self, letters: List[LetterTag]) -> str:
        """Generate phonetic transcription for MFA"""
        phonemes = []
        for letter in letters:
            if not letter.is_silent and letter.char_phonetic:
                phonemes.append(letter.char_phonetic)
        return ' '.join(phonemes)


def main():
    """Test the Tajweed parser"""
    parser = TajweedParser()
    
    # Test with Surah Al-Ikhlas
    test_text = "قُلْ هُوَ اللَّهُ أَحَدٌ"
    
    print("=" * 50)
    print("TajweedSST Parser Test")
    print("=" * 50)
    print(f"Input: {test_text}")
    print()
    
    words = parser.parse_text(test_text)
    
    for word in words:
        print(f"Word: {word.word_text}")
        print(f"  Phonetic: {word.phonetic_stream}")
        for letter in word.letters:
            if letter.tajweed_type != TajweedType.NONE:
                print(f"  [{letter.char_visual}] → {letter.tajweed_type.value} ({letter.physics_check.value})")
        print()


if __name__ == "__main__":
    main()