Rogaton
Fix Gradio 6.0 compatibility issues
84b26b8
#!/usr/bin/env python3
"""
Coptic Translation Interface - Hugging Face Space
Supports Coptic↔English translation using megalaa models
"""
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
# Coptic alphabet for virtual keyboard
COPTIC_LETTERS = [
'ⲁ', 'ⲃ', 'ⲅ', 'ⲇ', 'ⲉ', 'ⲍ', 'ⲏ', 'ⲑ', 'ⲓ', 'ⲕ', 'ⲗ', 'ⲙ',
'ⲛ', 'ⲝ', 'ⲟ', 'ⲡ', 'ⲣ', 'ⲥ', 'ⲧ', 'ⲩ', 'ⲫ', 'ⲭ', 'ⲯ', 'ⲱ',
'ϣ', 'ϥ', 'ϧ', 'ϩ', 'ϫ', 'ϭ', 'ϯ'
]
# Coptic-Greek character mappings (from handler.py)
COPTIC_TO_GREEK = {
"ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
"ⲍ": "ζ", "ⲏ": "η", "ⲑ": "θ", "ⲓ": "ι", "ⲕ": "κ", "ⲗ": "λ",
"ⲙ": "μ", "ⲛ": "ν", "ⲝ": "ξ", "ⲟ": "ο", "ⲡ": "π", "ⲣ": "ρ",
"ⲥ": "σ", "ⲧ": "τ", "ⲩ": "υ", "ⲫ": "φ", "ⲭ": "χ", "ⲯ": "ψ", "ⲱ": "ω",
"ϣ": "ʃ", "ϥ": "f", "ϧ": "x", "ϩ": "h", "ϫ": "ɟ", "ϭ": "c", "ϯ": "ti"
}
GREEK_TO_COPTIC = {v: k for k, v in COPTIC_TO_GREEK.items()}
def greekify(coptic_text):
"""Convert Coptic Unicode to Greek transcription"""
result = []
for char in coptic_text:
result.append(COPTIC_TO_GREEK.get(char.lower(), char.lower()))
return "".join(result)
def degreekify(greek_text):
"""Convert Greek transcription back to Coptic Unicode"""
result = []
i = 0
while i < len(greek_text):
if i < len(greek_text) - 1 and greek_text[i:i+2].lower() == 'ti':
result.append(GREEK_TO_COPTIC.get('ti', greek_text[i:i+2]))
i += 2
else:
result.append(GREEK_TO_COPTIC.get(greek_text[i], greek_text[i]))
i += 1
return ''.join(result)
# Model caching
coptic_to_english_model = None
english_to_coptic_model = None
device = "cuda" if torch.cuda.is_available() else "cpu"
def load_coptic_to_english():
"""Load Coptic → English translation model"""
global coptic_to_english_model
if coptic_to_english_model is None:
tokenizer = AutoTokenizer.from_pretrained("megalaa/coptic-english-translator")
model = AutoModelForSeq2SeqLM.from_pretrained("megalaa/coptic-english-translator")
model = model.to(device)
coptic_to_english_model = (tokenizer, model)
return coptic_to_english_model
def load_english_to_coptic():
"""Load English → Coptic translation model"""
global english_to_coptic_model
if english_to_coptic_model is None:
tokenizer = AutoTokenizer.from_pretrained("megalaa/english-coptic-translator")
model = AutoModelForSeq2SeqLM.from_pretrained("megalaa/english-coptic-translator")
model = model.to(device)
english_to_coptic_model = (tokenizer, model)
return english_to_coptic_model
def translate_coptic_to_english(text, dialect):
"""Translate Coptic to English"""
if not text or not text.strip():
return "Please enter Coptic text to translate."
try:
tokenizer, model = load_coptic_to_english()
# Preprocess: convert Coptic to Greek transcription
greek_text = greekify(text)
# Add dialect tag (from handler.py)
if dialect == "Bohairic":
greek_text = "б " + greek_text # Bohairic tag
else:
greek_text = "з " + greek_text # Sahidic tag
# Tokenize and generate
inputs = tokenizer(greek_text, return_tensors="pt", padding=True).to(device)
outputs = model.generate(
**inputs,
max_new_tokens=128,
num_beams=5,
early_stopping=True
)
# Decode
translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
return translation
except Exception as e:
return f"Translation error: {str(e)}"
def translate_english_to_coptic(text, dialect):
"""Translate English to Coptic"""
if not text or not text.strip():
return "Please enter English text to translate."
try:
tokenizer, model = load_english_to_coptic()
# Add dialect tag
if dialect == "Bohairic":
input_text = "б " + text # Bohairic tag
else:
input_text = "з " + text # Sahidic tag
# Tokenize and generate
inputs = tokenizer(input_text, return_tensors="pt", padding=True).to(device)
outputs = model.generate(
**inputs,
max_new_tokens=128,
num_beams=5,
early_stopping=True
)
# Decode and convert back to Coptic
greek_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
coptic_output = degreekify(greek_output)
return coptic_output
except Exception as e:
return f"Translation error: {str(e)}"
def add_letter(current_text, letter):
"""Add a Coptic letter to the current text"""
return current_text + letter if current_text else letter
def add_space(current_text):
"""Add a space to the current text"""
return current_text + " " if current_text else " "
def backspace(current_text):
"""Remove last character from current text"""
return current_text[:-1] if current_text else ""
def clear_text():
"""Clear all text"""
return ""
# Load comprehensive test corpus
import json
from pathlib import Path
def load_test_corpus():
"""Load the comprehensive Coptic test corpus"""
corpus_path = Path(__file__).parent / "coptic_test_corpus.json"
if corpus_path.exists():
with open(corpus_path, 'r', encoding='utf-8') as f:
return json.load(f)
return None
# Example texts organized by category
# SAHIDIC EXAMPLES
COPTIC_EXAMPLES_SIMPLE = [
["ⲁⲩⲱ ⲁϥⲙⲟⲩⲧⲉ ⲉⲣⲟϥ", "Sahidic"], # and he called him
["ⲁⲛⲟⲕ ⲡⲉ ⲡⲛⲟⲩⲧⲉ ⲙⲡⲉⲕⲉⲓⲱⲧ", "Sahidic"], # I am the God of your father
["ⲙⲡⲣⲣ ϩⲟⲧⲉ", "Sahidic"], # Do not be afraid
["ⲡϫⲟⲉⲓⲥ ⲡⲉ ⲡⲁⲛⲟⲩⲧⲉ", "Sahidic"], # The Lord is my God
["ⲁϥⲃⲱⲕ ⲉϩⲣⲁⲓ ⲉⲡⲉⲣⲡⲉ", "Sahidic"], # he went up to the temple
]
COPTIC_EXAMPLES_COMPLEX = [
["ⲁⲩⲱ ⲛⲧⲉⲣⲉϥⲛⲁⲩ ⲉⲡⲙⲏⲏϣⲉ ⲁϥϣⲡϩⲧⲏϥ ⲉϩⲣⲁⲓ ⲉϫⲱⲟⲩ", "Sahidic"], # when he saw the crowd
["ⲉϣⲱⲡⲉ ⲇⲉ ⲁⲩⲛⲁⲩ ⲉⲣⲟϥ ⲉϥⲙⲟⲟϣⲉ ϩⲓϫⲛ ⲧⲉⲑⲁⲗⲁⲥⲥⲁ ⲁⲩϣⲧⲟⲣⲧⲣ", "Sahidic"], # when they saw him walking
["ⲁⲓⲉⲓ ⲅⲁⲣ ⲉⲙⲟⲩⲧⲉ ⲁⲛ ⲉⲛⲇⲓⲕⲁⲓⲟⲥ ⲁⲗⲗⲁ ⲛⲣⲉϥⲣⲛⲟⲃⲉ", "Sahidic"], # I came not to call the righteous
]
COPTIC_EXAMPLES_TEXTS = [
["ⲛⲉⲩⲛⲟⲩⲙⲏⲏϣⲉ ⲇⲉ ⲛϣⲱⲛⲉ ⲉⲩⲛⲕⲟⲧⲕ ϩⲙ ⲡⲙⲁ ⲉⲧⲙⲙⲁⲩ· ⲛϩⲁⲛⲃⲗⲗⲉ ⲙⲛ ⲛϩⲁⲛϭⲁⲗⲉ ⲙⲛ ⲛϣⲟⲩⲱⲟⲩ·", "Sahidic"], # Healing at the pool
["ⲉⲓⲥ ⲡⲉⲧϫⲟ ⲁϥⲉⲓ ⲉⲃⲟⲗ ⲉϫⲟ· ⲁⲩⲱ ⲛⲧⲉⲣⲉϥϫⲟ ϩⲟⲓⲛⲉ ⲙⲉⲛ ⲁⲩϩⲉ ϩⲁⲧⲏ ⲧⲉϩⲓⲏ·", "Sahidic"], # The Sower parable
]
# BOHAIRIC EXAMPLES
BOHAIRIC_EXAMPLES_SIMPLE = [
["ⲟⲩⲟϩ ⲁϥⲙⲟⲩϯ ⲉⲣⲟϥ", "Bohairic"], # and he called him
["ⲁⲛⲟⲕ ⲡⲉ ⲫϯ ⲛⲧⲉ ⲡⲉⲕⲓⲱⲧ", "Bohairic"], # I am the God of your father
["ⲙⲡⲉⲣⲉⲣϩⲟϯ", "Bohairic"], # Do not be afraid
["ⲡϭⲟⲓⲥ ⲡⲉ ⲡⲁⲛⲟⲩϯ", "Bohairic"], # The Lord is my God
["ⲁϥϣⲉⲛⲁϥ ⲉⲡϣⲱⲓ ⲉⲡⲓⲉⲣⲫⲉⲓ", "Bohairic"], # he went up to the temple
]
BOHAIRIC_EXAMPLES_COMPLEX = [
["ⲟⲩⲟϩ ⲉⲧⲁϥⲛⲁⲩ ⲉⲡⲓⲙⲏϣ ⲁϥϣⲉⲛϩⲏⲧ ϧⲁⲣⲱⲟⲩ", "Bohairic"], # when he saw the crowd
["ⲡϭⲟⲓⲥ ⲡⲉⲧⲁⲙⲟⲛⲓ", "Bohairic"], # The Lord is my shepherd (Psalm 23:1)
]
BOHAIRIC_EXAMPLES_TEXTS = [
["ⲛⲉ ⲟⲩⲟⲛ ⲟⲩⲙⲏϣ ⲛϣⲱⲛⲓ ⲉⲩⲉⲛⲕⲟⲧ ϧⲉⲛ ⲡⲓⲙⲁ ⲉⲧⲉⲙⲙⲁⲩ· ϩⲁⲛⲃⲉⲗⲗⲉⲩ ⲛⲉⲙ ϩⲁⲛϭⲁⲗⲉⲩ ⲛⲉⲙ ϩⲁⲛϣⲁⲩⲟⲩⲱⲟⲩ·", "Bohairic"], # Healing at the pool (Bohairic)
]
ENGLISH_EXAMPLES = [
["The Lord is good", "Sahidic"],
["I am a teacher", "Sahidic"],
["We give thanks to God", "Sahidic"],
["Do not be afraid", "Sahidic"],
["He went to the house", "Sahidic"],
]
# Create Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("""
# 🔮 Coptic Translation Interface
Translate between Coptic and English using specialized models from [megalaa](https://huggingface.co/megalaa):
- **Coptic → English**: `megalaa/coptic-english-translator`
- **English → Coptic**: `megalaa/english-coptic-translator`
Based on neural machine translation models trained on Coptic-English parallel corpus.
""")
with gr.Tabs():
# Tab 1: Coptic → English
with gr.TabItem("Coptic → English"):
gr.Markdown("### Translate Coptic text to English")
with gr.Row():
with gr.Column(scale=1):
cop_input = gr.Textbox(
label="Coptic Text",
placeholder="Enter Coptic text or use the virtual keyboard below...",
lines=8,
max_lines=15
)
cop_dialect = gr.Radio(
choices=["Sahidic", "Bohairic"],
value="Sahidic",
label="Coptic Dialect"
)
# Virtual Coptic Keyboard
with gr.Group():
gr.Markdown("**Virtual Coptic Keyboard**")
# Create keyboard in rows of 8
for i in range(0, len(COPTIC_LETTERS), 8):
with gr.Row():
for letter in COPTIC_LETTERS[i:i+8]:
btn = gr.Button(letter, size="sm", scale=1)
btn.click(
fn=lambda current, l=letter: add_letter(current, l),
inputs=[cop_input],
outputs=[cop_input]
)
with gr.Row():
space_btn = gr.Button("Space", size="sm", scale=2)
back_btn = gr.Button("⌫ Backspace", size="sm", scale=2)
clear_btn = gr.Button("Clear", size="sm", scale=1)
space_btn.click(fn=add_space, inputs=[cop_input], outputs=[cop_input])
back_btn.click(fn=backspace, inputs=[cop_input], outputs=[cop_input])
clear_btn.click(fn=clear_text, outputs=[cop_input])
cop_translate_btn = gr.Button("🔄 Translate to English", variant="primary", size="lg")
with gr.Column(scale=1):
cop_output = gr.Textbox(
label="English Translation",
lines=8,
max_lines=15,
interactive=False
)
with gr.Accordion("📖 Example Texts", open=True):
gr.Markdown("### Sahidic Dialect (Literary Standard)")
gr.Markdown("**Simple Sentences**: Basic grammatical structures")
gr.Examples(
examples=COPTIC_EXAMPLES_SIMPLE,
inputs=[cop_input, cop_dialect],
outputs=cop_output,
fn=translate_coptic_to_english,
cache_examples=False,
label="Sahidic Simple"
)
gr.Markdown("**Complex Sentences**: Multi-clause with subordination")
gr.Examples(
examples=COPTIC_EXAMPLES_COMPLEX,
inputs=[cop_input, cop_dialect],
outputs=cop_output,
fn=translate_coptic_to_english,
cache_examples=False,
label="Sahidic Complex"
)
gr.Markdown("**Full Texts**: Connected discourse (paragraphs)")
gr.Examples(
examples=COPTIC_EXAMPLES_TEXTS,
inputs=[cop_input, cop_dialect],
outputs=cop_output,
fn=translate_coptic_to_english,
cache_examples=False,
label="Sahidic Texts"
)
gr.Markdown("---")
gr.Markdown("### Bohairic Dialect (Northern/Liturgical)")
gr.Markdown("**Simple Sentences**: Basic grammatical structures")
gr.Examples(
examples=BOHAIRIC_EXAMPLES_SIMPLE,
inputs=[cop_input, cop_dialect],
outputs=cop_output,
fn=translate_coptic_to_english,
cache_examples=False,
label="Bohairic Simple"
)
gr.Markdown("**Complex Sentences**: Multi-clause constructions")
gr.Examples(
examples=BOHAIRIC_EXAMPLES_COMPLEX,
inputs=[cop_input, cop_dialect],
outputs=cop_output,
fn=translate_coptic_to_english,
cache_examples=False,
label="Bohairic Complex"
)
gr.Markdown("**Full Texts**: Connected discourse")
gr.Examples(
examples=BOHAIRIC_EXAMPLES_TEXTS,
inputs=[cop_input, cop_dialect],
outputs=cop_output,
fn=translate_coptic_to_english,
cache_examples=False,
label="Bohairic Texts"
)
cop_translate_btn.click(
fn=translate_coptic_to_english,
inputs=[cop_input, cop_dialect],
outputs=cop_output
)
# Tab 2: English → Coptic
with gr.TabItem("English → Coptic"):
gr.Markdown("### Translate English text to Coptic")
with gr.Row():
with gr.Column(scale=1):
eng_input = gr.Textbox(
label="English Text",
placeholder="Enter English text...",
lines=8,
max_lines=15
)
eng_dialect = gr.Radio(
choices=["Sahidic", "Bohairic"],
value="Sahidic",
label="Target Coptic Dialect"
)
eng_translate_btn = gr.Button("🔄 Translate to Coptic", variant="primary", size="lg")
with gr.Column(scale=1):
eng_output = gr.Textbox(
label="Coptic Translation",
lines=8,
max_lines=15,
interactive=False
)
gr.Examples(
examples=ENGLISH_EXAMPLES,
inputs=[eng_input, eng_dialect],
outputs=eng_output,
fn=translate_english_to_coptic,
cache_examples=False,
label="📖 Example English Texts"
)
eng_translate_btn.click(
fn=translate_english_to_coptic,
inputs=[eng_input, eng_dialect],
outputs=eng_output
)
# Tab 3: Dependency Parsing (Neural-Symbolic)
with gr.TabItem("📊 Dependency Analysis"):
gr.Markdown("""
### Neural-Symbolic Coptic Parser
Hybrid architecture combining:
- **Neural**: Stanza + DiaParser for dependency parsing
- **Symbolic**: Prolog rules implementing Walter Till's grammar
- **Lexicon**: Crum's Coptic Dictionary integration
""")
with gr.Row():
with gr.Column(scale=1):
parse_input = gr.Textbox(
label="Coptic Text to Parse",
placeholder="Enter Coptic text for grammatical analysis...",
lines=6,
max_lines=10
)
parse_btn = gr.Button("🔍 Parse & Validate", variant="primary", size="lg")
with gr.Column(scale=1):
parse_output = gr.Markdown(
label="Dependency Parse Results",
value="Parse results will appear here..."
)
with gr.Accordion("Prolog Validation Results", open=False):
prolog_output = gr.Markdown(
value="Grammatical validation results will appear here..."
)
with gr.Accordion("Download Options", open=False):
conllu_download = gr.File(
label="Download CoNLL-U Format",
visible=False
)
with gr.Accordion("📖 Example Texts for Parsing", open=True):
gr.Markdown("**Simple Structures** - Test basic dependency relations")
simple_parse_examples = [
"ⲁⲩⲱ ⲁϥⲙⲟⲩⲧⲉ ⲉⲣⲟϥ", # and he called him
"ⲁⲛⲟⲕ ⲡⲉ ⲡⲛⲟⲩⲧⲉ ⲙⲡⲉⲕⲉⲓⲱⲧ", # Tripartite nominal
"ⲡϫⲟⲉⲓⲥ ⲡⲉ ⲡⲁⲛⲟⲩⲧⲉ", # The Lord is my God
]
gr.Examples(
examples=[[ex] for ex in simple_parse_examples],
inputs=parse_input,
label="Simple"
)
gr.Markdown("**Complex Structures** - Test subordination and coordination")
complex_parse_examples = [
"ⲁⲩⲱ ⲛⲧⲉⲣⲉϥⲛⲁⲩ ⲉⲡⲙⲏⲏϣⲉ ⲁϥϣⲡϩⲧⲏϥ ⲉϩⲣⲁⲓ ⲉϫⲱⲟⲩ", # Temporal clause
"ⲁⲓⲉⲓ ⲅⲁⲣ ⲉⲙⲟⲩⲧⲉ ⲁⲛ ⲉⲛⲇⲓⲕⲁⲓⲟⲥ ⲁⲗⲗⲁ ⲛⲣⲉϥⲣⲛⲟⲃⲉ", # Purpose with negation
]
gr.Examples(
examples=[[ex] for ex in complex_parse_examples],
inputs=parse_input,
label="Complex"
)
gr.Markdown("**Full Texts** - Test discourse-level parsing")
text_parse_examples = [
"ⲛⲉⲩⲛⲟⲩⲙⲏⲏϣⲉ ⲇⲉ ⲛϣⲱⲛⲉ ⲉⲩⲛⲕⲟⲧⲕ ϩⲙ ⲡⲙⲁ ⲉⲧⲙⲙⲁⲩ· ⲛϩⲁⲛⲃⲗⲗⲉ ⲙⲛ ⲛϩⲁⲛϭⲁⲗⲉ ⲙⲛ ⲛϣⲟⲩⲱⲟⲩ·",
]
gr.Examples(
examples=[[ex] for ex in text_parse_examples],
inputs=parse_input,
label="Texts"
)
def parse_coptic_text(text):
"""Parse Coptic text with neural-symbolic validation"""
if not text or not text.strip():
return "Please enter Coptic text to parse.", "", None
try:
from coptic_parser_core import CopticParserCore
# Initialize parser (cached)
parser = CopticParserCore()
parser.load_parser()
# Parse the text
result = parser.parse_text(text)
if not result:
return "❌ Parsing failed. Please check input.", "", None
# Format main output
main_output = f"""
## Parse Results
**Total Sentences**: {result['total_sentences']}
**Total Tokens**: {result['total_tokens']}
### Dependency Structure
{parser.format_table(result)}
"""
# Format Prolog validation output
prolog_output_text = ""
if 'prolog_validation' in result and result['prolog_validation']:
validation = result['prolog_validation']
prolog_output_text = "## 🔍 Prolog Validation (Walter Till Grammar)\n\n"
if validation.get('patterns_detected'):
prolog_output_text += "### ✅ Detected Grammatical Patterns\n\n"
for pattern in validation['patterns_detected']:
if isinstance(pattern, dict):
if pattern.get('is_tripartite'):
prolog_output_text += f"- **Tripartite Sentence**: {pattern.get('description', '')}\n"
prolog_output_text += f" ```\n {pattern.get('pattern', '')}\n ```\n"
else:
prolog_output_text += f"- {pattern}\n"
else:
prolog_output_text += f"- {pattern}\n"
if validation.get('warnings'):
prolog_output_text += "\n### ⚠️ Grammatical Warnings\n\n"
for warning in validation['warnings']:
prolog_output_text += f"- {warning}\n"
if not validation.get('warnings') and not validation.get('patterns_detected'):
prolog_output_text += "✓ No grammatical issues detected\n"
else:
prolog_output_text = "ℹ️ Prolog validation not available (requires SWI-Prolog)"
# Create CoNLL-U file for download
conllu_content = parser.format_conllu(result)
conllu_path = "/tmp/coptic_parse.conllu"
with open(conllu_path, 'w', encoding='utf-8') as f:
f.write(conllu_content)
return main_output, prolog_output_text, conllu_path
except Exception as e:
return f"❌ Error: {str(e)}", "", None
parse_btn.click(
fn=parse_coptic_text,
inputs=parse_input,
outputs=[parse_output, prolog_output, conllu_download]
)
gr.Markdown("""
---
### About This Research Interface
**Translation Models**:
- [megalaa/coptic-english-translator](https://huggingface.co/megalaa/coptic-english-translator) & [megalaa/english-coptic-translator](https://huggingface.co/megalaa/english-coptic-translator)
- Based on work by Enis & Megalaa (2024)
**Dependency Parser** (Neural-Symbolic Hybrid):
- **Neural**: Stanza NLP pipeline + DiaParser for Coptic
- **Symbolic**: Prolog implementation of Walter Till's Coptic grammar
- **Lexicon**: Integration with Crum's Coptic Dictionary
- **Error Detection**: Prolog validation catches neural parser hallucinations
**Research Features**:
- CoNLL-U format export for corpus analysis
- Grammatical pattern detection (tripartite sentences, etc.)
- Dialect-aware processing (Sahidic/Bohairic)
""")
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)