Spaces:
Running on Zero
Running on Zero
| """ | |
| Phoneme n-gram index: dataclass and cached loader. | |
| """ | |
| import pickle | |
| from dataclasses import dataclass | |
| from typing import Dict, List, Optional, Tuple | |
| from config import NGRAM_INDEX_PATH | |
| class PhonemeNgramIndex: | |
| """Pre-computed n-gram index for the entire Quran.""" | |
| # n-gram -> list of (surah, ayah) positions where it occurs | |
| ngram_positions: Dict[Tuple[str, ...], List[Tuple[int, int]]] | |
| # n-gram -> total occurrence count (for rarity weighting) | |
| ngram_counts: Dict[Tuple[str, ...], int] | |
| # Metadata | |
| ngram_size: int | |
| total_ngrams: int | |
| _INDEX: Optional[PhonemeNgramIndex] = None | |
| def get_ngram_index() -> PhonemeNgramIndex: | |
| """Get or load the phoneme n-gram index.""" | |
| global _INDEX | |
| if _INDEX is None: | |
| print(f"[NGRAM] Loading index from {NGRAM_INDEX_PATH}...") | |
| with open(NGRAM_INDEX_PATH, "rb") as f: | |
| _INDEX = pickle.load(f) | |
| print(f"[NGRAM] Loaded: {len(_INDEX.ngram_positions)} unique {_INDEX.ngram_size}-grams, " | |
| f"{_INDEX.total_ngrams} total occurrences") | |
| return _INDEX | |