Spaces:

hetchyy
/

Quran-multi-aligner

Running on Zero

App Files Files Community

Quran-multi-aligner / scripts /add_open_tanween.py

hetchyy

Initial commit

20e9692 6 days ago

raw

history blame contribute delete

2.06 kB

	"""Replace regular tanween in qpc_hafs.json with open tanween where digital_khatt uses them."""

	import json
	from pathlib import Path

	DATA_DIR = Path(__file__).resolve().parent.parent / "data"

	OPEN_TO_REGULAR = {
	"\u08F0": "\u064B", # open fathatan → regular fathatan
	"\u08F1": "\u064C", # open dammatan → regular dammatan
	"\u08F2": "\u064D", # open kasratan → regular kasratan
	}
	REGULAR_TO_OPEN = {v: k for k, v in OPEN_TO_REGULAR.items()}

	def main():
	khatt = json.loads((DATA_DIR / "digital_khatt_v2_script.json").read_text("utf-8"))
	qpc = json.loads((DATA_DIR / "qpc_hafs.json").read_text("utf-8"))

	counts = {"\u08F0": 0, "\u08F1": 0, "\u08F2": 0}
	mismatches = []

	for key, khatt_entry in khatt.items():
	if key not in qpc:
	continue
	khatt_text = khatt_entry["text"]
	qpc_text = qpc[key]["text"]

	for open_char, regular_char in OPEN_TO_REGULAR.items():
	if open_char in khatt_text:
	if regular_char in qpc_text:
	qpc_text = qpc_text.replace(regular_char, open_char)
	counts[open_char] += 1
	else:
	mismatches.append((key, open_char, khatt_text, qpc[key]["text"]))

	qpc[key]["text"] = qpc_text

	print("Replacements:")
	for char, count in counts.items():
	name = {"\u08F0": "fathatan", "\u08F1": "dammatan", "\u08F2": "kasratan"}[char]
	print(f" open {name} (U+{ord(char):04X}): {count} words")
	print(f" total: {sum(counts.values())} words")

	if mismatches:
	print(f"\nMismatches ({len(mismatches)}):")
	for key, char, kt, qt in mismatches[:10]:
	print(f" {key}: khatt has U+{ord(char):04X} but qpc missing regular equivalent")
	print(f" khatt: {kt}")
	print(f" qpc: {qt}")

	out_path = DATA_DIR / "qpc_hafs.json"
	out_path.write_text(json.dumps(qpc, ensure_ascii=False, indent=2) + "\n", "utf-8")
	print(f"\nSaved to {out_path}")


	if __name__ == "__main__":
	main()