crossroderick
/

aramt5

Text Generation

Classical Syriac

text2text-generation

transliteration

Eval Results (legacy)

Model card Files Files and versions

crossroderick commited on Mar 17

Commit

cf5dfba

·

1 Parent(s): 1b3bed7

Readme and test script updates

Files changed (2) hide show

README.md +1 -1
src/test_t5.py +12 -3

README.md CHANGED Viewed

@@ -87,7 +87,7 @@ Example output:
 Input:
 ܒܡܠܟܘܬܐ ܕܐܠܗܐ
-Output (East):
 b-malkutho d-aloho
 ```

 Input:
 ܒܡܠܟܘܬܐ ܕܐܠܗܐ
+Output (West):
 b-malkutho d-aloho
 ```

src/test_t5.py CHANGED Viewed

@@ -1,7 +1,16 @@
 from transformers import AutoTokenizer, T5ForConditionalGeneration, pipeline
 # Path config (relative to project root)
-model_path = "checkpoints/stage1-final/"
 # Load model and tokeniser
 print("Loading model and tokeniser...")
@@ -61,7 +70,7 @@ print("=" * 50)
 for sample in test_samples:
     result = transliterate(sample["text"], sample["dialect"])
     print(f"\n{sample['description']}:")
-    print(f"  Syriac: {sample['text']}")
     print(f"  Latin:  {result}")
 print("\n" + "=" * 50)
@@ -89,4 +98,4 @@ while True:
     if text:
         result = transliterate(text, dialect)
         dialect_name = "East" if dialect == "east" else "West"
-        print(f"  [{dialect_name}] {text} → {result}")

 from transformers import AutoTokenizer, T5ForConditionalGeneration, pipeline
 # Path config (relative to project root)
+model_path = "crossroderick/aramt5"
+# Unicode directional formatting for RTL text (Syriac)
+RLI = "\u2067"  # Right-to-Left Isolate
+PDI = "\u2069"  # Pop Directional Isolate
+def rtl(text: str) -> str:
+    """Wrap text in RTL isolate markers for correct terminal display."""
+    return f"{RLI}{text}{PDI}"
 # Load model and tokeniser
 print("Loading model and tokeniser...")
 for sample in test_samples:
     result = transliterate(sample["text"], sample["dialect"])
     print(f"\n{sample['description']}:")
+    print(f"  Syriac: {rtl(sample['text'])}")
     print(f"  Latin:  {result}")
 print("\n" + "=" * 50)
     if text:
         result = transliterate(text, dialect)
         dialect_name = "East" if dialect == "east" else "West"
+        print(f"  [{dialect_name}] {rtl(text)} → {result}")