"""Replace regular tanween in qpc_hafs.json with open tanween where digital_khatt uses them.""" import json from pathlib import Path DATA_DIR = Path(__file__).resolve().parent.parent / "data" OPEN_TO_REGULAR = { "\u08F0": "\u064B", # open fathatan → regular fathatan "\u08F1": "\u064C", # open dammatan → regular dammatan "\u08F2": "\u064D", # open kasratan → regular kasratan } REGULAR_TO_OPEN = {v: k for k, v in OPEN_TO_REGULAR.items()} def main(): khatt = json.loads((DATA_DIR / "digital_khatt_v2_script.json").read_text("utf-8")) qpc = json.loads((DATA_DIR / "qpc_hafs.json").read_text("utf-8")) counts = {"\u08F0": 0, "\u08F1": 0, "\u08F2": 0} mismatches = [] for key, khatt_entry in khatt.items(): if key not in qpc: continue khatt_text = khatt_entry["text"] qpc_text = qpc[key]["text"] for open_char, regular_char in OPEN_TO_REGULAR.items(): if open_char in khatt_text: if regular_char in qpc_text: qpc_text = qpc_text.replace(regular_char, open_char) counts[open_char] += 1 else: mismatches.append((key, open_char, khatt_text, qpc[key]["text"])) qpc[key]["text"] = qpc_text print("Replacements:") for char, count in counts.items(): name = {"\u08F0": "fathatan", "\u08F1": "dammatan", "\u08F2": "kasratan"}[char] print(f" open {name} (U+{ord(char):04X}): {count} words") print(f" total: {sum(counts.values())} words") if mismatches: print(f"\nMismatches ({len(mismatches)}):") for key, char, kt, qt in mismatches[:10]: print(f" {key}: khatt has U+{ord(char):04X} but qpc missing regular equivalent") print(f" khatt: {kt}") print(f" qpc: {qt}") out_path = DATA_DIR / "qpc_hafs.json" out_path.write_text(json.dumps(qpc, ensure_ascii=False, indent=2) + "\n", "utf-8") print(f"\nSaved to {out_path}") if __name__ == "__main__": main()