Spaces:
Running
on
Zero
Running
on
Zero
| """Replace regular tanween in qpc_hafs.json with open tanween where digital_khatt uses them.""" | |
| import json | |
| from pathlib import Path | |
| DATA_DIR = Path(__file__).resolve().parent.parent / "data" | |
| OPEN_TO_REGULAR = { | |
| "\u08F0": "\u064B", # open fathatan → regular fathatan | |
| "\u08F1": "\u064C", # open dammatan → regular dammatan | |
| "\u08F2": "\u064D", # open kasratan → regular kasratan | |
| } | |
| REGULAR_TO_OPEN = {v: k for k, v in OPEN_TO_REGULAR.items()} | |
| def main(): | |
| khatt = json.loads((DATA_DIR / "digital_khatt_v2_script.json").read_text("utf-8")) | |
| qpc = json.loads((DATA_DIR / "qpc_hafs.json").read_text("utf-8")) | |
| counts = {"\u08F0": 0, "\u08F1": 0, "\u08F2": 0} | |
| mismatches = [] | |
| for key, khatt_entry in khatt.items(): | |
| if key not in qpc: | |
| continue | |
| khatt_text = khatt_entry["text"] | |
| qpc_text = qpc[key]["text"] | |
| for open_char, regular_char in OPEN_TO_REGULAR.items(): | |
| if open_char in khatt_text: | |
| if regular_char in qpc_text: | |
| qpc_text = qpc_text.replace(regular_char, open_char) | |
| counts[open_char] += 1 | |
| else: | |
| mismatches.append((key, open_char, khatt_text, qpc[key]["text"])) | |
| qpc[key]["text"] = qpc_text | |
| print("Replacements:") | |
| for char, count in counts.items(): | |
| name = {"\u08F0": "fathatan", "\u08F1": "dammatan", "\u08F2": "kasratan"}[char] | |
| print(f" open {name} (U+{ord(char):04X}): {count} words") | |
| print(f" total: {sum(counts.values())} words") | |
| if mismatches: | |
| print(f"\nMismatches ({len(mismatches)}):") | |
| for key, char, kt, qt in mismatches[:10]: | |
| print(f" {key}: khatt has U+{ord(char):04X} but qpc missing regular equivalent") | |
| print(f" khatt: {kt}") | |
| print(f" qpc: {qt}") | |
| out_path = DATA_DIR / "qpc_hafs.json" | |
| out_path.write_text(json.dumps(qpc, ensure_ascii=False, indent=2) + "\n", "utf-8") | |
| print(f"\nSaved to {out_path}") | |
| if __name__ == "__main__": | |
| main() | |