Rogaton Claude commited on
Commit ·
7e78bf2
1
Parent(s): cc04472
Fix translation interface with correct megalaa models
Browse files- Use correct model names: megalaa/coptic-english-translator & megalaa/english-coptic-translator
- Add trust_remote_code=True for custom pipeline code
- Implement virtual Coptic keyboard in Gradio layout
- Add dialect selection (Sahidic/Bohairic) with proper parameters
- Improve UI with better layout and examples
Fixes model loading errors and missing keyboard interface.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -1,214 +1,243 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
Coptic Translation Interface - Hugging Face Space
|
| 4 |
-
Supports Coptic↔English translation using
|
| 5 |
"""
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
-
import
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
"ⲥ": "σ", "ⲧ": "τ", "ⲩ": "υ", "ⲫ": "φ", "ⲭ": "χ", "ⲯ": "ψ", "ⲱ": "ω",
|
| 17 |
-
"ϣ": "ʃ", "ϥ": "f", "ϧ": "x", "ϩ": "h", "ϫ": "ɟ", "ϭ": "c", "ϯ": "ti",
|
| 18 |
-
"Ⲁ": "Α", "Ⲃ": "Β", "Ⲅ": "Γ", "Ⲇ": "Δ", "Ⲉ": "Ε", "Ⲍ": "Ζ", "Ⲏ": "Η",
|
| 19 |
-
"Ⲑ": "Θ", "Ⲓ": "Ι", "Ⲕ": "Κ", "Ⲗ": "Λ", "Ⲙ": "Μ", "Ⲛ": "Ν", "Ⲝ": "Ξ",
|
| 20 |
-
"Ⲟ": "Ο", "Ⲡ": "Π", "Ⲣ": "Ρ", "Ⲥ": "Σ", "Ⲧ": "Τ", "Ⲩ": "Υ", "Ⲫ": "Φ",
|
| 21 |
-
"Ⲭ": "Χ", "Ⲯ": "Ψ", "Ⲱ": "Ω", "Ϣ": "Ʃ", "Ϥ": "F", "Ϧ": "X", "Ϩ": "H",
|
| 22 |
-
"Ϫ": "Ɉ", "Ϭ": "C", "Ϯ": "TI"
|
| 23 |
-
}
|
| 24 |
-
|
| 25 |
-
GREEK_TO_COPTIC = {v: k for k, v in COPTIC_TO_GREEK.items()}
|
| 26 |
-
|
| 27 |
-
def greekify(coptic_text):
|
| 28 |
-
"""Convert Coptic Unicode to Greek transcription"""
|
| 29 |
-
return "".join(COPTIC_TO_GREEK.get(c.lower(), c.lower()) for c in coptic_text)
|
| 30 |
-
|
| 31 |
-
def degreekify(greek_text):
|
| 32 |
-
"""Convert Greek transcription back to Coptic Unicode"""
|
| 33 |
-
result = []
|
| 34 |
-
i = 0
|
| 35 |
-
while i < len(greek_text):
|
| 36 |
-
if i < len(greek_text) - 1 and greek_text[i:i+2].lower() == 'ti':
|
| 37 |
-
result.append(GREEK_TO_COPTIC.get('ti', greek_text[i:i+2]))
|
| 38 |
-
i += 2
|
| 39 |
-
else:
|
| 40 |
-
result.append(GREEK_TO_COPTIC.get(greek_text[i], greek_text[i]))
|
| 41 |
-
i += 1
|
| 42 |
-
return ''.join(result)
|
| 43 |
|
| 44 |
-
# Model
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 48 |
|
| 49 |
def load_coptic_to_english():
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
| 57 |
|
| 58 |
def load_english_to_coptic():
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
| 68 |
"""Translate Coptic to English"""
|
|
|
|
|
|
|
|
|
|
| 69 |
try:
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
#
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
greek_input = greekify(text.lower())
|
| 78 |
-
greek_input = f"{dialect_tag} {greek_input}"
|
| 79 |
-
|
| 80 |
-
# Generate translation
|
| 81 |
-
inputs = tokenizer(greek_input, return_tensors="pt", padding=True).to(device)
|
| 82 |
-
outputs = model.generate(
|
| 83 |
-
**inputs,
|
| 84 |
-
max_new_tokens=128,
|
| 85 |
-
num_beams=5,
|
| 86 |
-
early_stopping=True
|
| 87 |
-
)
|
| 88 |
|
| 89 |
-
return
|
| 90 |
except Exception as e:
|
| 91 |
-
return f"Translation error: {e}"
|
| 92 |
|
| 93 |
-
def translate_english_to_coptic(text):
|
| 94 |
"""Translate English to Coptic"""
|
|
|
|
|
|
|
|
|
|
| 95 |
try:
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
#
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
num_beams=5,
|
| 104 |
-
early_stopping=True
|
| 105 |
-
)
|
| 106 |
|
| 107 |
-
|
| 108 |
-
greek_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 109 |
-
return degreekify(greek_output)
|
| 110 |
except Exception as e:
|
| 111 |
-
return f"Translation error: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
# Example texts
|
| 114 |
COPTIC_EXAMPLES = [
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
]
|
| 119 |
|
| 120 |
ENGLISH_EXAMPLES = [
|
| 121 |
-
"The Lord is good",
|
| 122 |
-
"I am a teacher",
|
| 123 |
-
"We give thanks to God",
|
| 124 |
]
|
| 125 |
|
| 126 |
-
# Gradio Interface
|
| 127 |
with gr.Blocks(title="Coptic Translation Interface", theme=gr.themes.Soft()) as demo:
|
| 128 |
gr.Markdown("""
|
| 129 |
# 🔮 Coptic Translation Interface
|
| 130 |
|
| 131 |
-
Translate between Coptic and English using
|
| 132 |
-
- **Coptic → English**: `
|
| 133 |
- **English → Coptic**: `megalaa/english-coptic-translator`
|
| 134 |
|
| 135 |
-
Based on
|
| 136 |
""")
|
| 137 |
|
| 138 |
-
with gr.
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
gr.Markdown("""
|
| 206 |
---
|
| 207 |
### About
|
| 208 |
-
This interface uses fine-tuned MarianMT models trained on the CopticScriptorium parallel corpus.
|
| 209 |
-
The models support bidirectional translation between Sahidic/Bohairic Coptic and English.
|
| 210 |
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
""")
|
| 213 |
|
| 214 |
if __name__ == "__main__":
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
Coptic Translation Interface - Hugging Face Space
|
| 4 |
+
Supports Coptic↔English translation using megalaa models
|
| 5 |
"""
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
+
from transformers import pipeline
|
| 9 |
+
|
| 10 |
+
# Coptic alphabet for virtual keyboard
|
| 11 |
+
COPTIC_LETTERS = [
|
| 12 |
+
'ⲁ', 'ⲃ', 'ⲅ', 'ⲇ', 'ⲉ', 'ⲍ', 'ⲏ', 'ⲑ', 'ⲓ', 'ⲕ', 'ⲗ', 'ⲙ',
|
| 13 |
+
'ⲛ', 'ⲝ', 'ⲟ', 'ⲡ', 'ⲣ', 'ⲥ', 'ⲧ', 'ⲩ', 'ⲫ', 'ⲭ', 'ⲯ', 'ⲱ',
|
| 14 |
+
'ϣ', 'ϥ', 'ϧ', 'ϩ', 'ϫ', 'ϭ', 'ϯ'
|
| 15 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
# Model caching
|
| 18 |
+
coptic_to_english_pipe = None
|
| 19 |
+
english_to_coptic_pipe = None
|
|
|
|
| 20 |
|
| 21 |
def load_coptic_to_english():
|
| 22 |
+
"""Load Coptic → English translation pipeline"""
|
| 23 |
+
global coptic_to_english_pipe
|
| 24 |
+
if coptic_to_english_pipe is None:
|
| 25 |
+
coptic_to_english_pipe = pipeline(
|
| 26 |
+
model="megalaa/coptic-english-translator",
|
| 27 |
+
trust_remote_code=True
|
| 28 |
+
)
|
| 29 |
+
return coptic_to_english_pipe
|
| 30 |
|
| 31 |
def load_english_to_coptic():
|
| 32 |
+
"""Load English → Coptic translation pipeline"""
|
| 33 |
+
global english_to_coptic_pipe
|
| 34 |
+
if english_to_coptic_pipe is None:
|
| 35 |
+
english_to_coptic_pipe = pipeline(
|
| 36 |
+
model="megalaa/english-coptic-translator",
|
| 37 |
+
trust_remote_code=True
|
| 38 |
+
)
|
| 39 |
+
return english_to_coptic_pipe
|
| 40 |
+
|
| 41 |
+
def translate_coptic_to_english(text, dialect):
|
| 42 |
"""Translate Coptic to English"""
|
| 43 |
+
if not text or not text.strip():
|
| 44 |
+
return "Please enter Coptic text to translate."
|
| 45 |
+
|
| 46 |
try:
|
| 47 |
+
pipe = load_coptic_to_english()
|
| 48 |
+
|
| 49 |
+
# Use from_bohairic parameter if Bohairic dialect selected
|
| 50 |
+
if dialect == "Bohairic":
|
| 51 |
+
result = pipe(text, from_bohairic=True)
|
| 52 |
+
else:
|
| 53 |
+
result = pipe(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
return result['translation']
|
| 56 |
except Exception as e:
|
| 57 |
+
return f"Translation error: {str(e)}"
|
| 58 |
|
| 59 |
+
def translate_english_to_coptic(text, dialect):
|
| 60 |
"""Translate English to Coptic"""
|
| 61 |
+
if not text or not text.strip():
|
| 62 |
+
return "Please enter English text to translate."
|
| 63 |
+
|
| 64 |
try:
|
| 65 |
+
pipe = load_english_to_coptic()
|
| 66 |
+
|
| 67 |
+
# Use to_bohairic parameter if Bohairic dialect selected
|
| 68 |
+
if dialect == "Bohairic":
|
| 69 |
+
result = pipe(text, to_bohairic=True)
|
| 70 |
+
else:
|
| 71 |
+
result = pipe(text)
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
+
return result['translation']
|
|
|
|
|
|
|
| 74 |
except Exception as e:
|
| 75 |
+
return f"Translation error: {str(e)}"
|
| 76 |
+
|
| 77 |
+
def add_letter(current_text, letter):
|
| 78 |
+
"""Add a Coptic letter to the current text"""
|
| 79 |
+
return current_text + letter if current_text else letter
|
| 80 |
+
|
| 81 |
+
def add_space(current_text):
|
| 82 |
+
"""Add a space to the current text"""
|
| 83 |
+
return current_text + " " if current_text else " "
|
| 84 |
+
|
| 85 |
+
def backspace(current_text):
|
| 86 |
+
"""Remove last character from current text"""
|
| 87 |
+
return current_text[:-1] if current_text else ""
|
| 88 |
+
|
| 89 |
+
def clear_text():
|
| 90 |
+
"""Clear all text"""
|
| 91 |
+
return ""
|
| 92 |
|
| 93 |
# Example texts
|
| 94 |
COPTIC_EXAMPLES = [
|
| 95 |
+
["ϯⲛⲁⲃⲱⲕ ⲉⲡⲏⲓ", "Sahidic"],
|
| 96 |
+
["ⲡⲉⲭⲣⲓⲥⲧⲟⲥ ⲡⲉ ⲡⲛⲟⲩⲧⲉ", "Sahidic"],
|
| 97 |
+
["ⲁⲓⲛⲁⲩ ⲉⲡⲣⲱⲙⲉ", "Sahidic"],
|
| 98 |
]
|
| 99 |
|
| 100 |
ENGLISH_EXAMPLES = [
|
| 101 |
+
["The Lord is good", "Sahidic"],
|
| 102 |
+
["I am a teacher", "Sahidic"],
|
| 103 |
+
["We give thanks to God", "Sahidic"],
|
| 104 |
]
|
| 105 |
|
| 106 |
+
# Create Gradio Interface
|
| 107 |
with gr.Blocks(title="Coptic Translation Interface", theme=gr.themes.Soft()) as demo:
|
| 108 |
gr.Markdown("""
|
| 109 |
# 🔮 Coptic Translation Interface
|
| 110 |
|
| 111 |
+
Translate between Coptic and English using specialized models from [megalaa](https://huggingface.co/megalaa):
|
| 112 |
+
- **Coptic → English**: `megalaa/coptic-english-translator`
|
| 113 |
- **English → Coptic**: `megalaa/english-coptic-translator`
|
| 114 |
|
| 115 |
+
Based on neural machine translation models trained on Coptic-English parallel corpus.
|
| 116 |
""")
|
| 117 |
|
| 118 |
+
with gr.Tabs():
|
| 119 |
+
# Tab 1: Coptic → English
|
| 120 |
+
with gr.TabItem("Coptic → English"):
|
| 121 |
+
gr.Markdown("### Translate Coptic text to English")
|
| 122 |
+
|
| 123 |
+
with gr.Row():
|
| 124 |
+
with gr.Column(scale=1):
|
| 125 |
+
cop_input = gr.Textbox(
|
| 126 |
+
label="Coptic Text",
|
| 127 |
+
placeholder="Enter Coptic text or use the virtual keyboard below...",
|
| 128 |
+
lines=8,
|
| 129 |
+
max_lines=15
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
cop_dialect = gr.Radio(
|
| 133 |
+
choices=["Sahidic", "Bohairic"],
|
| 134 |
+
value="Sahidic",
|
| 135 |
+
label="Coptic Dialect"
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Virtual Coptic Keyboard
|
| 139 |
+
with gr.Group():
|
| 140 |
+
gr.Markdown("**Virtual Coptic Keyboard**")
|
| 141 |
+
|
| 142 |
+
# Create keyboard in rows of 8
|
| 143 |
+
for i in range(0, len(COPTIC_LETTERS), 8):
|
| 144 |
+
with gr.Row():
|
| 145 |
+
for letter in COPTIC_LETTERS[i:i+8]:
|
| 146 |
+
btn = gr.Button(letter, size="sm", scale=1)
|
| 147 |
+
btn.click(
|
| 148 |
+
fn=lambda current, l=letter: add_letter(current, l),
|
| 149 |
+
inputs=[cop_input],
|
| 150 |
+
outputs=[cop_input]
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
with gr.Row():
|
| 154 |
+
space_btn = gr.Button("Space", size="sm", scale=2)
|
| 155 |
+
back_btn = gr.Button("⌫ Backspace", size="sm", scale=2)
|
| 156 |
+
clear_btn = gr.Button("Clear", size="sm", scale=1)
|
| 157 |
+
|
| 158 |
+
space_btn.click(fn=add_space, inputs=[cop_input], outputs=[cop_input])
|
| 159 |
+
back_btn.click(fn=backspace, inputs=[cop_input], outputs=[cop_input])
|
| 160 |
+
clear_btn.click(fn=clear_text, outputs=[cop_input])
|
| 161 |
+
|
| 162 |
+
cop_translate_btn = gr.Button("🔄 Translate to English", variant="primary", size="lg")
|
| 163 |
+
|
| 164 |
+
with gr.Column(scale=1):
|
| 165 |
+
cop_output = gr.Textbox(
|
| 166 |
+
label="English Translation",
|
| 167 |
+
lines=8,
|
| 168 |
+
max_lines=15,
|
| 169 |
+
interactive=False
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
gr.Examples(
|
| 173 |
+
examples=COPTIC_EXAMPLES,
|
| 174 |
+
inputs=[cop_input, cop_dialect],
|
| 175 |
+
outputs=cop_output,
|
| 176 |
+
fn=translate_coptic_to_english,
|
| 177 |
+
cache_examples=False,
|
| 178 |
+
label="📖 Example Coptic Texts"
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
cop_translate_btn.click(
|
| 182 |
+
fn=translate_coptic_to_english,
|
| 183 |
+
inputs=[cop_input, cop_dialect],
|
| 184 |
+
outputs=cop_output
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# Tab 2: English → Coptic
|
| 188 |
+
with gr.TabItem("English → Coptic"):
|
| 189 |
+
gr.Markdown("### Translate English text to Coptic")
|
| 190 |
+
|
| 191 |
+
with gr.Row():
|
| 192 |
+
with gr.Column(scale=1):
|
| 193 |
+
eng_input = gr.Textbox(
|
| 194 |
+
label="English Text",
|
| 195 |
+
placeholder="Enter English text...",
|
| 196 |
+
lines=8,
|
| 197 |
+
max_lines=15
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
eng_dialect = gr.Radio(
|
| 201 |
+
choices=["Sahidic", "Bohairic"],
|
| 202 |
+
value="Sahidic",
|
| 203 |
+
label="Target Coptic Dialect"
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
eng_translate_btn = gr.Button("🔄 Translate to Coptic", variant="primary", size="lg")
|
| 207 |
+
|
| 208 |
+
with gr.Column(scale=1):
|
| 209 |
+
eng_output = gr.Textbox(
|
| 210 |
+
label="Coptic Translation",
|
| 211 |
+
lines=8,
|
| 212 |
+
max_lines=15,
|
| 213 |
+
interactive=False
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
gr.Examples(
|
| 217 |
+
examples=ENGLISH_EXAMPLES,
|
| 218 |
+
inputs=[eng_input, eng_dialect],
|
| 219 |
+
outputs=eng_output,
|
| 220 |
+
fn=translate_english_to_coptic,
|
| 221 |
+
cache_examples=False,
|
| 222 |
+
label="📖 Example English Texts"
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
eng_translate_btn.click(
|
| 226 |
+
fn=translate_english_to_coptic,
|
| 227 |
+
inputs=[eng_input, eng_dialect],
|
| 228 |
+
outputs=eng_output
|
| 229 |
+
)
|
| 230 |
|
| 231 |
gr.Markdown("""
|
| 232 |
---
|
| 233 |
### About
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
This interface uses neural machine translation models trained on Coptic-English parallel corpus:
|
| 236 |
+
- **Models**: [megalaa/coptic-english-translator](https://huggingface.co/megalaa/coptic-english-translator) & [megalaa/english-coptic-translator](https://huggingface.co/megalaa/english-coptic-translator)
|
| 237 |
+
- **Dialects**: Supports both Sahidic (default) and Bohairic Coptic
|
| 238 |
+
- **Input**: Use proper Unicode Coptic characters (U+2C80–U+2CFF) or the virtual keyboard
|
| 239 |
+
|
| 240 |
+
**Research**: Based on work by Enis & Megalaa (2024) - "Ancient voices, modern technology: Low-resource neural machine translation for coptic texts"
|
| 241 |
""")
|
| 242 |
|
| 243 |
if __name__ == "__main__":
|