hetchyy's picture
Initial commit
20e9692
"""
Phoneme n-gram index: dataclass and cached loader.
"""
import pickle
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
from config import NGRAM_INDEX_PATH
@dataclass
class PhonemeNgramIndex:
"""Pre-computed n-gram index for the entire Quran."""
# n-gram -> list of (surah, ayah) positions where it occurs
ngram_positions: Dict[Tuple[str, ...], List[Tuple[int, int]]]
# n-gram -> total occurrence count (for rarity weighting)
ngram_counts: Dict[Tuple[str, ...], int]
# Metadata
ngram_size: int
total_ngrams: int
_INDEX: Optional[PhonemeNgramIndex] = None
def get_ngram_index() -> PhonemeNgramIndex:
"""Get or load the phoneme n-gram index."""
global _INDEX
if _INDEX is None:
print(f"[NGRAM] Loading index from {NGRAM_INDEX_PATH}...")
with open(NGRAM_INDEX_PATH, "rb") as f:
_INDEX = pickle.load(f)
print(f"[NGRAM] Loaded: {len(_INDEX.ngram_positions)} unique {_INDEX.ngram_size}-grams, "
f"{_INDEX.total_ngrams} total occurrences")
return _INDEX