Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,060 Bytes
20e9692 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
"""Replace regular tanween in qpc_hafs.json with open tanween where digital_khatt uses them."""
import json
from pathlib import Path
DATA_DIR = Path(__file__).resolve().parent.parent / "data"
OPEN_TO_REGULAR = {
"\u08F0": "\u064B", # open fathatan → regular fathatan
"\u08F1": "\u064C", # open dammatan → regular dammatan
"\u08F2": "\u064D", # open kasratan → regular kasratan
}
REGULAR_TO_OPEN = {v: k for k, v in OPEN_TO_REGULAR.items()}
def main():
khatt = json.loads((DATA_DIR / "digital_khatt_v2_script.json").read_text("utf-8"))
qpc = json.loads((DATA_DIR / "qpc_hafs.json").read_text("utf-8"))
counts = {"\u08F0": 0, "\u08F1": 0, "\u08F2": 0}
mismatches = []
for key, khatt_entry in khatt.items():
if key not in qpc:
continue
khatt_text = khatt_entry["text"]
qpc_text = qpc[key]["text"]
for open_char, regular_char in OPEN_TO_REGULAR.items():
if open_char in khatt_text:
if regular_char in qpc_text:
qpc_text = qpc_text.replace(regular_char, open_char)
counts[open_char] += 1
else:
mismatches.append((key, open_char, khatt_text, qpc[key]["text"]))
qpc[key]["text"] = qpc_text
print("Replacements:")
for char, count in counts.items():
name = {"\u08F0": "fathatan", "\u08F1": "dammatan", "\u08F2": "kasratan"}[char]
print(f" open {name} (U+{ord(char):04X}): {count} words")
print(f" total: {sum(counts.values())} words")
if mismatches:
print(f"\nMismatches ({len(mismatches)}):")
for key, char, kt, qt in mismatches[:10]:
print(f" {key}: khatt has U+{ord(char):04X} but qpc missing regular equivalent")
print(f" khatt: {kt}")
print(f" qpc: {qt}")
out_path = DATA_DIR / "qpc_hafs.json"
out_path.write_text(json.dumps(qpc, ensure_ascii=False, indent=2) + "\n", "utf-8")
print(f"\nSaved to {out_path}")
if __name__ == "__main__":
main()
|