Spaces:

hetchyy
/

Quran-multi-aligner

Running on Zero

Initial commit

20e9692 30 days ago

1.09 kB

	"""
	Phoneme n-gram index: dataclass and cached loader.
	"""

	import pickle
	from dataclasses import dataclass
	from typing import Dict, List, Optional, Tuple

	from config import NGRAM_INDEX_PATH


	@dataclass
	class PhonemeNgramIndex:
	"""Pre-computed n-gram index for the entire Quran."""

	# n-gram -> list of (surah, ayah) positions where it occurs
	ngram_positions: Dict[Tuple[str, ...], List[Tuple[int, int]]]

	# n-gram -> total occurrence count (for rarity weighting)
	ngram_counts: Dict[Tuple[str, ...], int]

	# Metadata
	ngram_size: int
	total_ngrams: int


	_INDEX: Optional[PhonemeNgramIndex] = None


	def get_ngram_index() -> PhonemeNgramIndex:
	"""Get or load the phoneme n-gram index."""
	global _INDEX
	if _INDEX is None:
	print(f"[NGRAM] Loading index from {NGRAM_INDEX_PATH}...")
	with open(NGRAM_INDEX_PATH, "rb") as f:
	_INDEX = pickle.load(f)
	print(f"[NGRAM] Loaded: {len(_INDEX.ngram_positions)} unique {_INDEX.ngram_size}-grams, "
	f"{_INDEX.total_ngrams} total occurrences")
	return _INDEX