crossroderick commited on
Commit
cf5dfba
·
1 Parent(s): 1b3bed7

Readme and test script updates

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. src/test_t5.py +12 -3
README.md CHANGED
@@ -87,7 +87,7 @@ Example output:
87
  Input:
88
  ܒܡܠܟܘܬܐ ܕܐܠܗܐ
89
 
90
- Output (East):
91
  b-malkutho d-aloho
92
  ```
93
 
 
87
  Input:
88
  ܒܡܠܟܘܬܐ ܕܐܠܗܐ
89
 
90
+ Output (West):
91
  b-malkutho d-aloho
92
  ```
93
 
src/test_t5.py CHANGED
@@ -1,7 +1,16 @@
1
  from transformers import AutoTokenizer, T5ForConditionalGeneration, pipeline
2
 
3
  # Path config (relative to project root)
4
- model_path = "checkpoints/stage1-final/"
 
 
 
 
 
 
 
 
 
5
 
6
  # Load model and tokeniser
7
  print("Loading model and tokeniser...")
@@ -61,7 +70,7 @@ print("=" * 50)
61
  for sample in test_samples:
62
  result = transliterate(sample["text"], sample["dialect"])
63
  print(f"\n{sample['description']}:")
64
- print(f" Syriac: {sample['text']}")
65
  print(f" Latin: {result}")
66
 
67
  print("\n" + "=" * 50)
@@ -89,4 +98,4 @@ while True:
89
  if text:
90
  result = transliterate(text, dialect)
91
  dialect_name = "East" if dialect == "east" else "West"
92
- print(f" [{dialect_name}] {text} → {result}")
 
1
  from transformers import AutoTokenizer, T5ForConditionalGeneration, pipeline
2
 
3
  # Path config (relative to project root)
4
+ model_path = "crossroderick/aramt5"
5
+
6
+ # Unicode directional formatting for RTL text (Syriac)
7
+ RLI = "\u2067" # Right-to-Left Isolate
8
+ PDI = "\u2069" # Pop Directional Isolate
9
+
10
+
11
+ def rtl(text: str) -> str:
12
+ """Wrap text in RTL isolate markers for correct terminal display."""
13
+ return f"{RLI}{text}{PDI}"
14
 
15
  # Load model and tokeniser
16
  print("Loading model and tokeniser...")
 
70
  for sample in test_samples:
71
  result = transliterate(sample["text"], sample["dialect"])
72
  print(f"\n{sample['description']}:")
73
+ print(f" Syriac: {rtl(sample['text'])}")
74
  print(f" Latin: {result}")
75
 
76
  print("\n" + "=" * 50)
 
98
  if text:
99
  result = transliterate(text, dialect)
100
  dialect_name = "East" if dialect == "east" else "West"
101
+ print(f" [{dialect_name}] {rtl(text)} → {result}")