Commit ·
cf5dfba
1
Parent(s): 1b3bed7
Readme and test script updates
Browse files- README.md +1 -1
- src/test_t5.py +12 -3
README.md
CHANGED
|
@@ -87,7 +87,7 @@ Example output:
|
|
| 87 |
Input:
|
| 88 |
ܒܡܠܟܘܬܐ ܕܐܠܗܐ
|
| 89 |
|
| 90 |
-
Output (
|
| 91 |
b-malkutho d-aloho
|
| 92 |
```
|
| 93 |
|
|
|
|
| 87 |
Input:
|
| 88 |
ܒܡܠܟܘܬܐ ܕܐܠܗܐ
|
| 89 |
|
| 90 |
+
Output (West):
|
| 91 |
b-malkutho d-aloho
|
| 92 |
```
|
| 93 |
|
src/test_t5.py
CHANGED
|
@@ -1,7 +1,16 @@
|
|
| 1 |
from transformers import AutoTokenizer, T5ForConditionalGeneration, pipeline
|
| 2 |
|
| 3 |
# Path config (relative to project root)
|
| 4 |
-
model_path = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Load model and tokeniser
|
| 7 |
print("Loading model and tokeniser...")
|
|
@@ -61,7 +70,7 @@ print("=" * 50)
|
|
| 61 |
for sample in test_samples:
|
| 62 |
result = transliterate(sample["text"], sample["dialect"])
|
| 63 |
print(f"\n{sample['description']}:")
|
| 64 |
-
print(f" Syriac: {sample['text']}")
|
| 65 |
print(f" Latin: {result}")
|
| 66 |
|
| 67 |
print("\n" + "=" * 50)
|
|
@@ -89,4 +98,4 @@ while True:
|
|
| 89 |
if text:
|
| 90 |
result = transliterate(text, dialect)
|
| 91 |
dialect_name = "East" if dialect == "east" else "West"
|
| 92 |
-
print(f" [{dialect_name}] {text} → {result}")
|
|
|
|
| 1 |
from transformers import AutoTokenizer, T5ForConditionalGeneration, pipeline
|
| 2 |
|
| 3 |
# Path config (relative to project root)
|
| 4 |
+
model_path = "crossroderick/aramt5"
|
| 5 |
+
|
| 6 |
+
# Unicode directional formatting for RTL text (Syriac)
|
| 7 |
+
RLI = "\u2067" # Right-to-Left Isolate
|
| 8 |
+
PDI = "\u2069" # Pop Directional Isolate
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def rtl(text: str) -> str:
|
| 12 |
+
"""Wrap text in RTL isolate markers for correct terminal display."""
|
| 13 |
+
return f"{RLI}{text}{PDI}"
|
| 14 |
|
| 15 |
# Load model and tokeniser
|
| 16 |
print("Loading model and tokeniser...")
|
|
|
|
| 70 |
for sample in test_samples:
|
| 71 |
result = transliterate(sample["text"], sample["dialect"])
|
| 72 |
print(f"\n{sample['description']}:")
|
| 73 |
+
print(f" Syriac: {rtl(sample['text'])}")
|
| 74 |
print(f" Latin: {result}")
|
| 75 |
|
| 76 |
print("\n" + "=" * 50)
|
|
|
|
| 98 |
if text:
|
| 99 |
result = transliterate(text, dialect)
|
| 100 |
dialect_name = "East" if dialect == "east" else "West"
|
| 101 |
+
print(f" [{dialect_name}] {rtl(text)} → {result}")
|