Rogaton Claude commited on
Commit
7e78bf2
·
1 Parent(s): cc04472

Fix translation interface with correct megalaa models

Browse files

- Use correct model names: megalaa/coptic-english-translator & megalaa/english-coptic-translator
- Add trust_remote_code=True for custom pipeline code
- Implement virtual Coptic keyboard in Gradio layout
- Add dialect selection (Sahidic/Bohairic) with proper parameters
- Improve UI with better layout and examples

Fixes model loading errors and missing keyboard interface.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +199 -170
app.py CHANGED
@@ -1,214 +1,243 @@
1
  #!/usr/bin/env python3
2
  """
3
  Coptic Translation Interface - Hugging Face Space
4
- Supports Coptic↔English translation using fine-tuned MEGALAA models
5
  """
6
 
7
  import gradio as gr
8
- import torch
9
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
10
-
11
- # Coptic-Greek character mappings for model preprocessing
12
- COPTIC_TO_GREEK = {
13
- "ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
14
- "ⲍ": "ζ", "ⲏ": "η", "ⲑ": "θ", "ⲓ": "ι", "ⲕ": "κ", "ⲗ": "λ",
15
- "ⲙ": "μ", "ⲛ": "ν", "ⲝ": "ξ", "ⲟ": "ο", "ⲡ": "π", "ⲣ": "ρ",
16
- "ⲥ": "σ", "ⲧ": "τ", "ⲩ": "υ", "ⲫ": "φ", "ⲭ": "χ", "ⲯ": "ψ", "ⲱ": "ω",
17
- "ϣ": "ʃ", "ϥ": "f", "ϧ": "x", "ϩ": "h", "ϫ": "ɟ", "ϭ": "c", "ϯ": "ti",
18
- "Ⲁ": "Α", "Ⲃ": "Β", "Ⲅ": "Γ", "Ⲇ": "Δ", "Ⲉ": "Ε", "Ⲍ": "Ζ", "Ⲏ": "Η",
19
- "Ⲑ": "Θ", "Ⲓ": "Ι", "Ⲕ": "Κ", "Ⲗ": "Λ", "Ⲙ": "Μ", "Ⲛ": "Ν", "Ⲝ": "Ξ",
20
- "Ⲟ": "Ο", "Ⲡ": "Π", "Ⲣ": "Ρ", "Ⲥ": "Σ", "Ⲧ": "Τ", "Ⲩ": "Υ", "Ⲫ": "Φ",
21
- "Ⲭ": "Χ", "Ⲯ": "Ψ", "Ⲱ": "Ω", "Ϣ": "Ʃ", "Ϥ": "F", "Ϧ": "X", "Ϩ": "H",
22
- "Ϫ": "Ɉ", "Ϭ": "C", "Ϯ": "TI"
23
- }
24
-
25
- GREEK_TO_COPTIC = {v: k for k, v in COPTIC_TO_GREEK.items()}
26
-
27
- def greekify(coptic_text):
28
- """Convert Coptic Unicode to Greek transcription"""
29
- return "".join(COPTIC_TO_GREEK.get(c.lower(), c.lower()) for c in coptic_text)
30
-
31
- def degreekify(greek_text):
32
- """Convert Greek transcription back to Coptic Unicode"""
33
- result = []
34
- i = 0
35
- while i < len(greek_text):
36
- if i < len(greek_text) - 1 and greek_text[i:i+2].lower() == 'ti':
37
- result.append(GREEK_TO_COPTIC.get('ti', greek_text[i:i+2]))
38
- i += 2
39
- else:
40
- result.append(GREEK_TO_COPTIC.get(greek_text[i], greek_text[i]))
41
- i += 1
42
- return ''.join(result)
43
 
44
- # Model loading with caching
45
- coptic_to_english_model = None
46
- english_to_coptic_model = None
47
- device = "cuda" if torch.cuda.is_available() else "cpu"
48
 
49
  def load_coptic_to_english():
50
- global coptic_to_english_model
51
- if coptic_to_english_model is None:
52
- model_name = "Norelad/coptic-megalaa-finetuned"
53
- tokenizer = AutoTokenizer.from_pretrained(model_name)
54
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
55
- coptic_to_english_model = (tokenizer, model)
56
- return coptic_to_english_model
 
57
 
58
  def load_english_to_coptic():
59
- global english_to_coptic_model
60
- if english_to_coptic_model is None:
61
- model_name = "megalaa/english-coptic-translator"
62
- tokenizer = AutoTokenizer.from_pretrained(model_name)
63
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
64
- english_to_coptic_model = (tokenizer, model)
65
- return english_to_coptic_model
66
-
67
- def translate_coptic_to_english(text, dialect='cop-sa'):
 
68
  """Translate Coptic to English"""
 
 
 
69
  try:
70
- tokenizer, model = load_coptic_to_english()
71
-
72
- # Dialect tags
73
- DIALECT_TAGS = {'cop-sa': 'з', 'cop-bo': 'б', 'cop': 'з'}
74
- dialect_tag = DIALECT_TAGS.get(dialect, 'з')
75
-
76
- # Preprocess: Convert to Greek transcription and add dialect tag
77
- greek_input = greekify(text.lower())
78
- greek_input = f"{dialect_tag} {greek_input}"
79
-
80
- # Generate translation
81
- inputs = tokenizer(greek_input, return_tensors="pt", padding=True).to(device)
82
- outputs = model.generate(
83
- **inputs,
84
- max_new_tokens=128,
85
- num_beams=5,
86
- early_stopping=True
87
- )
88
 
89
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
90
  except Exception as e:
91
- return f"Translation error: {e}"
92
 
93
- def translate_english_to_coptic(text):
94
  """Translate English to Coptic"""
 
 
 
95
  try:
96
- tokenizer, model = load_english_to_coptic()
97
-
98
- # Generate translation
99
- inputs = tokenizer(text, return_tensors="pt", padding=True).to(device)
100
- outputs = model.generate(
101
- **inputs,
102
- max_new_tokens=128,
103
- num_beams=5,
104
- early_stopping=True
105
- )
106
 
107
- # Convert Greek output to Coptic
108
- greek_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
109
- return degreekify(greek_output)
110
  except Exception as e:
111
- return f"Translation error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  # Example texts
114
  COPTIC_EXAMPLES = [
115
- ("ϯⲛⲁⲃⲱⲕ ⲉⲡⲏⲓ", "I will go to the house"),
116
- ("ⲡⲉⲭⲣⲓⲥⲧⲟⲥ ⲡⲉ ⲡⲛⲟⲩⲧⲉ", "Christ is God"),
117
- ("ⲁⲓⲛⲁⲩ ⲉⲡⲣⲱⲙⲉ", "I saw the man"),
118
  ]
119
 
120
  ENGLISH_EXAMPLES = [
121
- "The Lord is good",
122
- "I am a teacher",
123
- "We give thanks to God",
124
  ]
125
 
126
- # Gradio Interface
127
  with gr.Blocks(title="Coptic Translation Interface", theme=gr.themes.Soft()) as demo:
128
  gr.Markdown("""
129
  # 🔮 Coptic Translation Interface
130
 
131
- Translate between Coptic and English using fine-tuned MEGALAA models:
132
- - **Coptic → English**: `Norelad/coptic-megalaa-finetuned`
133
  - **English → Coptic**: `megalaa/english-coptic-translator`
134
 
135
- Based on 50,000+ parallel sentences from CopticScriptorium corpus.
136
  """)
137
 
138
- with gr.Tab("Coptic → English"):
139
- with gr.Row():
140
- with gr.Column():
141
- cop_input = gr.Textbox(
142
- label="Coptic Text",
143
- placeholder="Enter Coptic text (Unicode)...",
144
- lines=5
145
- )
146
- cop_dialect = gr.Radio(
147
- choices=[("Sahidic", "cop-sa"), ("Bohairic", "cop-bo")],
148
- value="cop-sa",
149
- label="Dialect"
150
- )
151
- cop_translate_btn = gr.Button("Translate to English", variant="primary")
152
-
153
- with gr.Column():
154
- cop_output = gr.Textbox(
155
- label="English Translation",
156
- lines=5,
157
- interactive=False
158
- )
159
-
160
- gr.Examples(
161
- examples=[[ex[0], "cop-sa"] for ex in COPTIC_EXAMPLES],
162
- inputs=[cop_input, cop_dialect],
163
- outputs=cop_output,
164
- fn=translate_coptic_to_english,
165
- label="Example Coptic Texts"
166
- )
167
-
168
- cop_translate_btn.click(
169
- fn=translate_coptic_to_english,
170
- inputs=[cop_input, cop_dialect],
171
- outputs=cop_output
172
- )
173
-
174
- with gr.Tab("English → Coptic"):
175
- with gr.Row():
176
- with gr.Column():
177
- eng_input = gr.Textbox(
178
- label="English Text",
179
- placeholder="Enter English text...",
180
- lines=5
181
- )
182
- eng_translate_btn = gr.Button("Translate to Coptic", variant="primary")
183
-
184
- with gr.Column():
185
- eng_output = gr.Textbox(
186
- label="Coptic Translation",
187
- lines=5,
188
- interactive=False
189
- )
190
-
191
- gr.Examples(
192
- examples=[[ex] for ex in ENGLISH_EXAMPLES],
193
- inputs=eng_input,
194
- outputs=eng_output,
195
- fn=translate_english_to_coptic,
196
- label="Example English Texts"
197
- )
198
-
199
- eng_translate_btn.click(
200
- fn=translate_english_to_coptic,
201
- inputs=eng_input,
202
- outputs=eng_output
203
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  gr.Markdown("""
206
  ---
207
  ### About
208
- This interface uses fine-tuned MarianMT models trained on the CopticScriptorium parallel corpus.
209
- The models support bidirectional translation between Sahidic/Bohairic Coptic and English.
210
 
211
- **Note**: For best results with Coptic input, use proper Unicode Coptic characters (U+2C80–U+2CFF).
 
 
 
 
 
212
  """)
213
 
214
  if __name__ == "__main__":
 
1
  #!/usr/bin/env python3
2
  """
3
  Coptic Translation Interface - Hugging Face Space
4
+ Supports Coptic↔English translation using megalaa models
5
  """
6
 
7
  import gradio as gr
8
+ from transformers import pipeline
9
+
10
+ # Coptic alphabet for virtual keyboard
11
+ COPTIC_LETTERS = [
12
+ 'ⲁ', 'ⲃ', 'ⲅ', 'ⲇ', 'ⲉ', 'ⲍ', 'ⲏ', 'ⲑ', 'ⲓ', 'ⲕ', 'ⲗ', 'ⲙ',
13
+ 'ⲛ', 'ⲝ', 'ⲟ', 'ⲡ', 'ⲣ', 'ⲥ', 'ⲧ', 'ⲩ', 'ⲫ', 'ⲭ', 'ⲯ', 'ⲱ',
14
+ 'ϣ', 'ϥ', 'ϧ', 'ϩ', 'ϫ', 'ϭ', 'ϯ'
15
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Model caching
18
+ coptic_to_english_pipe = None
19
+ english_to_coptic_pipe = None
 
20
 
21
  def load_coptic_to_english():
22
+ """Load Coptic → English translation pipeline"""
23
+ global coptic_to_english_pipe
24
+ if coptic_to_english_pipe is None:
25
+ coptic_to_english_pipe = pipeline(
26
+ model="megalaa/coptic-english-translator",
27
+ trust_remote_code=True
28
+ )
29
+ return coptic_to_english_pipe
30
 
31
  def load_english_to_coptic():
32
+ """Load English → Coptic translation pipeline"""
33
+ global english_to_coptic_pipe
34
+ if english_to_coptic_pipe is None:
35
+ english_to_coptic_pipe = pipeline(
36
+ model="megalaa/english-coptic-translator",
37
+ trust_remote_code=True
38
+ )
39
+ return english_to_coptic_pipe
40
+
41
+ def translate_coptic_to_english(text, dialect):
42
  """Translate Coptic to English"""
43
+ if not text or not text.strip():
44
+ return "Please enter Coptic text to translate."
45
+
46
  try:
47
+ pipe = load_coptic_to_english()
48
+
49
+ # Use from_bohairic parameter if Bohairic dialect selected
50
+ if dialect == "Bohairic":
51
+ result = pipe(text, from_bohairic=True)
52
+ else:
53
+ result = pipe(text)
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ return result['translation']
56
  except Exception as e:
57
+ return f"Translation error: {str(e)}"
58
 
59
+ def translate_english_to_coptic(text, dialect):
60
  """Translate English to Coptic"""
61
+ if not text or not text.strip():
62
+ return "Please enter English text to translate."
63
+
64
  try:
65
+ pipe = load_english_to_coptic()
66
+
67
+ # Use to_bohairic parameter if Bohairic dialect selected
68
+ if dialect == "Bohairic":
69
+ result = pipe(text, to_bohairic=True)
70
+ else:
71
+ result = pipe(text)
 
 
 
72
 
73
+ return result['translation']
 
 
74
  except Exception as e:
75
+ return f"Translation error: {str(e)}"
76
+
77
+ def add_letter(current_text, letter):
78
+ """Add a Coptic letter to the current text"""
79
+ return current_text + letter if current_text else letter
80
+
81
+ def add_space(current_text):
82
+ """Add a space to the current text"""
83
+ return current_text + " " if current_text else " "
84
+
85
+ def backspace(current_text):
86
+ """Remove last character from current text"""
87
+ return current_text[:-1] if current_text else ""
88
+
89
+ def clear_text():
90
+ """Clear all text"""
91
+ return ""
92
 
93
  # Example texts
94
  COPTIC_EXAMPLES = [
95
+ ["ϯⲛⲁⲃⲱⲕ ⲉⲡⲏⲓ", "Sahidic"],
96
+ ["ⲡⲉⲭⲣⲓⲥⲧⲟⲥ ⲡⲉ ⲡⲛⲟⲩⲧⲉ", "Sahidic"],
97
+ ["ⲁⲓⲛⲁⲩ ⲉⲡⲣⲱⲙⲉ", "Sahidic"],
98
  ]
99
 
100
  ENGLISH_EXAMPLES = [
101
+ ["The Lord is good", "Sahidic"],
102
+ ["I am a teacher", "Sahidic"],
103
+ ["We give thanks to God", "Sahidic"],
104
  ]
105
 
106
+ # Create Gradio Interface
107
  with gr.Blocks(title="Coptic Translation Interface", theme=gr.themes.Soft()) as demo:
108
  gr.Markdown("""
109
  # 🔮 Coptic Translation Interface
110
 
111
+ Translate between Coptic and English using specialized models from [megalaa](https://huggingface.co/megalaa):
112
+ - **Coptic → English**: `megalaa/coptic-english-translator`
113
  - **English → Coptic**: `megalaa/english-coptic-translator`
114
 
115
+ Based on neural machine translation models trained on Coptic-English parallel corpus.
116
  """)
117
 
118
+ with gr.Tabs():
119
+ # Tab 1: Coptic → English
120
+ with gr.TabItem("Coptic → English"):
121
+ gr.Markdown("### Translate Coptic text to English")
122
+
123
+ with gr.Row():
124
+ with gr.Column(scale=1):
125
+ cop_input = gr.Textbox(
126
+ label="Coptic Text",
127
+ placeholder="Enter Coptic text or use the virtual keyboard below...",
128
+ lines=8,
129
+ max_lines=15
130
+ )
131
+
132
+ cop_dialect = gr.Radio(
133
+ choices=["Sahidic", "Bohairic"],
134
+ value="Sahidic",
135
+ label="Coptic Dialect"
136
+ )
137
+
138
+ # Virtual Coptic Keyboard
139
+ with gr.Group():
140
+ gr.Markdown("**Virtual Coptic Keyboard**")
141
+
142
+ # Create keyboard in rows of 8
143
+ for i in range(0, len(COPTIC_LETTERS), 8):
144
+ with gr.Row():
145
+ for letter in COPTIC_LETTERS[i:i+8]:
146
+ btn = gr.Button(letter, size="sm", scale=1)
147
+ btn.click(
148
+ fn=lambda current, l=letter: add_letter(current, l),
149
+ inputs=[cop_input],
150
+ outputs=[cop_input]
151
+ )
152
+
153
+ with gr.Row():
154
+ space_btn = gr.Button("Space", size="sm", scale=2)
155
+ back_btn = gr.Button("⌫ Backspace", size="sm", scale=2)
156
+ clear_btn = gr.Button("Clear", size="sm", scale=1)
157
+
158
+ space_btn.click(fn=add_space, inputs=[cop_input], outputs=[cop_input])
159
+ back_btn.click(fn=backspace, inputs=[cop_input], outputs=[cop_input])
160
+ clear_btn.click(fn=clear_text, outputs=[cop_input])
161
+
162
+ cop_translate_btn = gr.Button("🔄 Translate to English", variant="primary", size="lg")
163
+
164
+ with gr.Column(scale=1):
165
+ cop_output = gr.Textbox(
166
+ label="English Translation",
167
+ lines=8,
168
+ max_lines=15,
169
+ interactive=False
170
+ )
171
+
172
+ gr.Examples(
173
+ examples=COPTIC_EXAMPLES,
174
+ inputs=[cop_input, cop_dialect],
175
+ outputs=cop_output,
176
+ fn=translate_coptic_to_english,
177
+ cache_examples=False,
178
+ label="📖 Example Coptic Texts"
179
+ )
180
+
181
+ cop_translate_btn.click(
182
+ fn=translate_coptic_to_english,
183
+ inputs=[cop_input, cop_dialect],
184
+ outputs=cop_output
185
+ )
186
+
187
+ # Tab 2: English → Coptic
188
+ with gr.TabItem("English → Coptic"):
189
+ gr.Markdown("### Translate English text to Coptic")
190
+
191
+ with gr.Row():
192
+ with gr.Column(scale=1):
193
+ eng_input = gr.Textbox(
194
+ label="English Text",
195
+ placeholder="Enter English text...",
196
+ lines=8,
197
+ max_lines=15
198
+ )
199
+
200
+ eng_dialect = gr.Radio(
201
+ choices=["Sahidic", "Bohairic"],
202
+ value="Sahidic",
203
+ label="Target Coptic Dialect"
204
+ )
205
+
206
+ eng_translate_btn = gr.Button("🔄 Translate to Coptic", variant="primary", size="lg")
207
+
208
+ with gr.Column(scale=1):
209
+ eng_output = gr.Textbox(
210
+ label="Coptic Translation",
211
+ lines=8,
212
+ max_lines=15,
213
+ interactive=False
214
+ )
215
+
216
+ gr.Examples(
217
+ examples=ENGLISH_EXAMPLES,
218
+ inputs=[eng_input, eng_dialect],
219
+ outputs=eng_output,
220
+ fn=translate_english_to_coptic,
221
+ cache_examples=False,
222
+ label="📖 Example English Texts"
223
+ )
224
+
225
+ eng_translate_btn.click(
226
+ fn=translate_english_to_coptic,
227
+ inputs=[eng_input, eng_dialect],
228
+ outputs=eng_output
229
+ )
230
 
231
  gr.Markdown("""
232
  ---
233
  ### About
 
 
234
 
235
+ This interface uses neural machine translation models trained on Coptic-English parallel corpus:
236
+ - **Models**: [megalaa/coptic-english-translator](https://huggingface.co/megalaa/coptic-english-translator) & [megalaa/english-coptic-translator](https://huggingface.co/megalaa/english-coptic-translator)
237
+ - **Dialects**: Supports both Sahidic (default) and Bohairic Coptic
238
+ - **Input**: Use proper Unicode Coptic characters (U+2C80–U+2CFF) or the virtual keyboard
239
+
240
+ **Research**: Based on work by Enis & Megalaa (2024) - "Ancient voices, modern technology: Low-resource neural machine translation for coptic texts"
241
  """)
242
 
243
  if __name__ == "__main__":