| |
| """ |
| Coptic Translation Interface - Hugging Face Space |
| Supports Coptic↔English translation using megalaa models |
| """ |
|
|
| import gradio as gr |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
| import torch |
|
|
| |
| COPTIC_LETTERS = [ |
| 'ⲁ', 'ⲃ', 'ⲅ', 'ⲇ', 'ⲉ', 'ⲍ', 'ⲏ', 'ⲑ', 'ⲓ', 'ⲕ', 'ⲗ', 'ⲙ', |
| 'ⲛ', 'ⲝ', 'ⲟ', 'ⲡ', 'ⲣ', 'ⲥ', 'ⲧ', 'ⲩ', 'ⲫ', 'ⲭ', 'ⲯ', 'ⲱ', |
| 'ϣ', 'ϥ', 'ϧ', 'ϩ', 'ϫ', 'ϭ', 'ϯ' |
| ] |
|
|
| |
| COPTIC_TO_GREEK = { |
| "ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ", |
| "ⲍ": "ζ", "ⲏ": "η", "ⲑ": "θ", "ⲓ": "ι", "ⲕ": "κ", "ⲗ": "λ", |
| "ⲙ": "μ", "ⲛ": "ν", "ⲝ": "ξ", "ⲟ": "ο", "ⲡ": "π", "ⲣ": "ρ", |
| "ⲥ": "σ", "ⲧ": "τ", "ⲩ": "υ", "ⲫ": "φ", "ⲭ": "χ", "ⲯ": "ψ", "ⲱ": "ω", |
| "ϣ": "ʃ", "ϥ": "f", "ϧ": "x", "ϩ": "h", "ϫ": "ɟ", "ϭ": "c", "ϯ": "ti" |
| } |
|
|
| GREEK_TO_COPTIC = {v: k for k, v in COPTIC_TO_GREEK.items()} |
|
|
| def greekify(coptic_text): |
| """Convert Coptic Unicode to Greek transcription""" |
| result = [] |
| for char in coptic_text: |
| result.append(COPTIC_TO_GREEK.get(char.lower(), char.lower())) |
| return "".join(result) |
|
|
| def degreekify(greek_text): |
| """Convert Greek transcription back to Coptic Unicode""" |
| result = [] |
| i = 0 |
| while i < len(greek_text): |
| if i < len(greek_text) - 1 and greek_text[i:i+2].lower() == 'ti': |
| result.append(GREEK_TO_COPTIC.get('ti', greek_text[i:i+2])) |
| i += 2 |
| else: |
| result.append(GREEK_TO_COPTIC.get(greek_text[i], greek_text[i])) |
| i += 1 |
| return ''.join(result) |
|
|
| |
| coptic_to_english_model = None |
| english_to_coptic_model = None |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| def load_coptic_to_english(): |
| """Load Coptic → English translation model""" |
| global coptic_to_english_model |
| if coptic_to_english_model is None: |
| tokenizer = AutoTokenizer.from_pretrained("megalaa/coptic-english-translator") |
| model = AutoModelForSeq2SeqLM.from_pretrained("megalaa/coptic-english-translator") |
| model = model.to(device) |
| coptic_to_english_model = (tokenizer, model) |
| return coptic_to_english_model |
|
|
| def load_english_to_coptic(): |
| """Load English → Coptic translation model""" |
| global english_to_coptic_model |
| if english_to_coptic_model is None: |
| tokenizer = AutoTokenizer.from_pretrained("megalaa/english-coptic-translator") |
| model = AutoModelForSeq2SeqLM.from_pretrained("megalaa/english-coptic-translator") |
| model = model.to(device) |
| english_to_coptic_model = (tokenizer, model) |
| return english_to_coptic_model |
|
|
| def translate_coptic_to_english(text, dialect): |
| """Translate Coptic to English""" |
| if not text or not text.strip(): |
| return "Please enter Coptic text to translate." |
|
|
| try: |
| tokenizer, model = load_coptic_to_english() |
|
|
| |
| greek_text = greekify(text) |
|
|
| |
| if dialect == "Bohairic": |
| greek_text = "б " + greek_text |
| else: |
| greek_text = "з " + greek_text |
|
|
| |
| inputs = tokenizer(greek_text, return_tensors="pt", padding=True).to(device) |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=128, |
| num_beams=5, |
| early_stopping=True |
| ) |
|
|
| |
| translation = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return translation |
|
|
| except Exception as e: |
| return f"Translation error: {str(e)}" |
|
|
| def translate_english_to_coptic(text, dialect): |
| """Translate English to Coptic""" |
| if not text or not text.strip(): |
| return "Please enter English text to translate." |
|
|
| try: |
| tokenizer, model = load_english_to_coptic() |
|
|
| |
| if dialect == "Bohairic": |
| input_text = "б " + text |
| else: |
| input_text = "з " + text |
|
|
| |
| inputs = tokenizer(input_text, return_tensors="pt", padding=True).to(device) |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=128, |
| num_beams=5, |
| early_stopping=True |
| ) |
|
|
| |
| greek_output = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| coptic_output = degreekify(greek_output) |
| return coptic_output |
|
|
| except Exception as e: |
| return f"Translation error: {str(e)}" |
|
|
| def add_letter(current_text, letter): |
| """Add a Coptic letter to the current text""" |
| return current_text + letter if current_text else letter |
|
|
| def add_space(current_text): |
| """Add a space to the current text""" |
| return current_text + " " if current_text else " " |
|
|
| def backspace(current_text): |
| """Remove last character from current text""" |
| return current_text[:-1] if current_text else "" |
|
|
| def clear_text(): |
| """Clear all text""" |
| return "" |
|
|
| |
| import json |
| from pathlib import Path |
|
|
| def load_test_corpus(): |
| """Load the comprehensive Coptic test corpus""" |
| corpus_path = Path(__file__).parent / "coptic_test_corpus.json" |
| if corpus_path.exists(): |
| with open(corpus_path, 'r', encoding='utf-8') as f: |
| return json.load(f) |
| return None |
|
|
| |
| |
| COPTIC_EXAMPLES_SIMPLE = [ |
| ["ⲁⲩⲱ ⲁϥⲙⲟⲩⲧⲉ ⲉⲣⲟϥ", "Sahidic"], |
| ["ⲁⲛⲟⲕ ⲡⲉ ⲡⲛⲟⲩⲧⲉ ⲙⲡⲉⲕⲉⲓⲱⲧ", "Sahidic"], |
| ["ⲙⲡⲣⲣ ϩⲟⲧⲉ", "Sahidic"], |
| ["ⲡϫⲟⲉⲓⲥ ⲡⲉ ⲡⲁⲛⲟⲩⲧⲉ", "Sahidic"], |
| ["ⲁϥⲃⲱⲕ ⲉϩⲣⲁⲓ ⲉⲡⲉⲣⲡⲉ", "Sahidic"], |
| ] |
|
|
| COPTIC_EXAMPLES_COMPLEX = [ |
| ["ⲁⲩⲱ ⲛⲧⲉⲣⲉϥⲛⲁⲩ ⲉⲡⲙⲏⲏϣⲉ ⲁϥϣⲡϩⲧⲏϥ ⲉϩⲣⲁⲓ ⲉϫⲱⲟⲩ", "Sahidic"], |
| ["ⲉϣⲱⲡⲉ ⲇⲉ ⲁⲩⲛⲁⲩ ⲉⲣⲟϥ ⲉϥⲙⲟⲟϣⲉ ϩⲓϫⲛ ⲧⲉⲑⲁⲗⲁⲥⲥⲁ ⲁⲩϣⲧⲟⲣⲧⲣ", "Sahidic"], |
| ["ⲁⲓⲉⲓ ⲅⲁⲣ ⲉⲙⲟⲩⲧⲉ ⲁⲛ ⲉⲛⲇⲓⲕⲁⲓⲟⲥ ⲁⲗⲗⲁ ⲛⲣⲉϥⲣⲛⲟⲃⲉ", "Sahidic"], |
| ] |
|
|
| COPTIC_EXAMPLES_TEXTS = [ |
| ["ⲛⲉⲩⲛⲟⲩⲙⲏⲏϣⲉ ⲇⲉ ⲛϣⲱⲛⲉ ⲉⲩⲛⲕⲟⲧⲕ ϩⲙ ⲡⲙⲁ ⲉⲧⲙⲙⲁⲩ· ⲛϩⲁⲛⲃⲗⲗⲉ ⲙⲛ ⲛϩⲁⲛϭⲁⲗⲉ ⲙⲛ ⲛϣⲟⲩⲱⲟⲩ·", "Sahidic"], |
| ["ⲉⲓⲥ ⲡⲉⲧϫⲟ ⲁϥⲉⲓ ⲉⲃⲟⲗ ⲉϫⲟ· ⲁⲩⲱ ⲛⲧⲉⲣⲉϥϫⲟ ϩⲟⲓⲛⲉ ⲙⲉⲛ ⲁⲩϩⲉ ϩⲁⲧⲏ ⲧⲉϩⲓⲏ·", "Sahidic"], |
| ] |
|
|
| |
| BOHAIRIC_EXAMPLES_SIMPLE = [ |
| ["ⲟⲩⲟϩ ⲁϥⲙⲟⲩϯ ⲉⲣⲟϥ", "Bohairic"], |
| ["ⲁⲛⲟⲕ ⲡⲉ ⲫϯ ⲛⲧⲉ ⲡⲉⲕⲓⲱⲧ", "Bohairic"], |
| ["ⲙⲡⲉⲣⲉⲣϩⲟϯ", "Bohairic"], |
| ["ⲡϭⲟⲓⲥ ⲡⲉ ⲡⲁⲛⲟⲩϯ", "Bohairic"], |
| ["ⲁϥϣⲉⲛⲁϥ ⲉⲡϣⲱⲓ ⲉⲡⲓⲉⲣⲫⲉⲓ", "Bohairic"], |
| ] |
|
|
| BOHAIRIC_EXAMPLES_COMPLEX = [ |
| ["ⲟⲩⲟϩ ⲉⲧⲁϥⲛⲁⲩ ⲉⲡⲓⲙⲏϣ ⲁϥϣⲉⲛϩⲏⲧ ϧⲁⲣⲱⲟⲩ", "Bohairic"], |
| ["ⲡϭⲟⲓⲥ ⲡⲉⲧⲁⲙⲟⲛⲓ", "Bohairic"], |
| ] |
|
|
| BOHAIRIC_EXAMPLES_TEXTS = [ |
| ["ⲛⲉ ⲟⲩⲟⲛ ⲟⲩⲙⲏϣ ⲛϣⲱⲛⲓ ⲉⲩⲉⲛⲕⲟⲧ ϧⲉⲛ ⲡⲓⲙⲁ ⲉⲧⲉⲙⲙⲁⲩ· ϩⲁⲛⲃⲉⲗⲗⲉⲩ ⲛⲉⲙ ϩⲁⲛϭⲁⲗⲉⲩ ⲛⲉⲙ ϩⲁⲛϣⲁⲩⲟⲩⲱⲟⲩ·", "Bohairic"], |
| ] |
|
|
| ENGLISH_EXAMPLES = [ |
| ["The Lord is good", "Sahidic"], |
| ["I am a teacher", "Sahidic"], |
| ["We give thanks to God", "Sahidic"], |
| ["Do not be afraid", "Sahidic"], |
| ["He went to the house", "Sahidic"], |
| ] |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown(""" |
| # 🔮 Coptic Translation Interface |
| |
| Translate between Coptic and English using specialized models from [megalaa](https://huggingface.co/megalaa): |
| - **Coptic → English**: `megalaa/coptic-english-translator` |
| - **English → Coptic**: `megalaa/english-coptic-translator` |
| |
| Based on neural machine translation models trained on Coptic-English parallel corpus. |
| """) |
|
|
| with gr.Tabs(): |
| |
| with gr.TabItem("Coptic → English"): |
| gr.Markdown("### Translate Coptic text to English") |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| cop_input = gr.Textbox( |
| label="Coptic Text", |
| placeholder="Enter Coptic text or use the virtual keyboard below...", |
| lines=8, |
| max_lines=15 |
| ) |
|
|
| cop_dialect = gr.Radio( |
| choices=["Sahidic", "Bohairic"], |
| value="Sahidic", |
| label="Coptic Dialect" |
| ) |
|
|
| |
| with gr.Group(): |
| gr.Markdown("**Virtual Coptic Keyboard**") |
|
|
| |
| for i in range(0, len(COPTIC_LETTERS), 8): |
| with gr.Row(): |
| for letter in COPTIC_LETTERS[i:i+8]: |
| btn = gr.Button(letter, size="sm", scale=1) |
| btn.click( |
| fn=lambda current, l=letter: add_letter(current, l), |
| inputs=[cop_input], |
| outputs=[cop_input] |
| ) |
|
|
| with gr.Row(): |
| space_btn = gr.Button("Space", size="sm", scale=2) |
| back_btn = gr.Button("⌫ Backspace", size="sm", scale=2) |
| clear_btn = gr.Button("Clear", size="sm", scale=1) |
|
|
| space_btn.click(fn=add_space, inputs=[cop_input], outputs=[cop_input]) |
| back_btn.click(fn=backspace, inputs=[cop_input], outputs=[cop_input]) |
| clear_btn.click(fn=clear_text, outputs=[cop_input]) |
|
|
| cop_translate_btn = gr.Button("🔄 Translate to English", variant="primary", size="lg") |
|
|
| with gr.Column(scale=1): |
| cop_output = gr.Textbox( |
| label="English Translation", |
| lines=8, |
| max_lines=15, |
| interactive=False |
| ) |
|
|
| with gr.Accordion("📖 Example Texts", open=True): |
| gr.Markdown("### Sahidic Dialect (Literary Standard)") |
|
|
| gr.Markdown("**Simple Sentences**: Basic grammatical structures") |
| gr.Examples( |
| examples=COPTIC_EXAMPLES_SIMPLE, |
| inputs=[cop_input, cop_dialect], |
| outputs=cop_output, |
| fn=translate_coptic_to_english, |
| cache_examples=False, |
| label="Sahidic Simple" |
| ) |
|
|
| gr.Markdown("**Complex Sentences**: Multi-clause with subordination") |
| gr.Examples( |
| examples=COPTIC_EXAMPLES_COMPLEX, |
| inputs=[cop_input, cop_dialect], |
| outputs=cop_output, |
| fn=translate_coptic_to_english, |
| cache_examples=False, |
| label="Sahidic Complex" |
| ) |
|
|
| gr.Markdown("**Full Texts**: Connected discourse (paragraphs)") |
| gr.Examples( |
| examples=COPTIC_EXAMPLES_TEXTS, |
| inputs=[cop_input, cop_dialect], |
| outputs=cop_output, |
| fn=translate_coptic_to_english, |
| cache_examples=False, |
| label="Sahidic Texts" |
| ) |
|
|
| gr.Markdown("---") |
| gr.Markdown("### Bohairic Dialect (Northern/Liturgical)") |
|
|
| gr.Markdown("**Simple Sentences**: Basic grammatical structures") |
| gr.Examples( |
| examples=BOHAIRIC_EXAMPLES_SIMPLE, |
| inputs=[cop_input, cop_dialect], |
| outputs=cop_output, |
| fn=translate_coptic_to_english, |
| cache_examples=False, |
| label="Bohairic Simple" |
| ) |
|
|
| gr.Markdown("**Complex Sentences**: Multi-clause constructions") |
| gr.Examples( |
| examples=BOHAIRIC_EXAMPLES_COMPLEX, |
| inputs=[cop_input, cop_dialect], |
| outputs=cop_output, |
| fn=translate_coptic_to_english, |
| cache_examples=False, |
| label="Bohairic Complex" |
| ) |
|
|
| gr.Markdown("**Full Texts**: Connected discourse") |
| gr.Examples( |
| examples=BOHAIRIC_EXAMPLES_TEXTS, |
| inputs=[cop_input, cop_dialect], |
| outputs=cop_output, |
| fn=translate_coptic_to_english, |
| cache_examples=False, |
| label="Bohairic Texts" |
| ) |
|
|
| cop_translate_btn.click( |
| fn=translate_coptic_to_english, |
| inputs=[cop_input, cop_dialect], |
| outputs=cop_output |
| ) |
|
|
| |
| with gr.TabItem("English → Coptic"): |
| gr.Markdown("### Translate English text to Coptic") |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| eng_input = gr.Textbox( |
| label="English Text", |
| placeholder="Enter English text...", |
| lines=8, |
| max_lines=15 |
| ) |
|
|
| eng_dialect = gr.Radio( |
| choices=["Sahidic", "Bohairic"], |
| value="Sahidic", |
| label="Target Coptic Dialect" |
| ) |
|
|
| eng_translate_btn = gr.Button("🔄 Translate to Coptic", variant="primary", size="lg") |
|
|
| with gr.Column(scale=1): |
| eng_output = gr.Textbox( |
| label="Coptic Translation", |
| lines=8, |
| max_lines=15, |
| interactive=False |
| ) |
|
|
| gr.Examples( |
| examples=ENGLISH_EXAMPLES, |
| inputs=[eng_input, eng_dialect], |
| outputs=eng_output, |
| fn=translate_english_to_coptic, |
| cache_examples=False, |
| label="📖 Example English Texts" |
| ) |
|
|
| eng_translate_btn.click( |
| fn=translate_english_to_coptic, |
| inputs=[eng_input, eng_dialect], |
| outputs=eng_output |
| ) |
|
|
| |
| with gr.TabItem("📊 Dependency Analysis"): |
| gr.Markdown(""" |
| ### Neural-Symbolic Coptic Parser |
| |
| Hybrid architecture combining: |
| - **Neural**: Stanza + DiaParser for dependency parsing |
| - **Symbolic**: Prolog rules implementing Walter Till's grammar |
| - **Lexicon**: Crum's Coptic Dictionary integration |
| """) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| parse_input = gr.Textbox( |
| label="Coptic Text to Parse", |
| placeholder="Enter Coptic text for grammatical analysis...", |
| lines=6, |
| max_lines=10 |
| ) |
|
|
| parse_btn = gr.Button("🔍 Parse & Validate", variant="primary", size="lg") |
|
|
| with gr.Column(scale=1): |
| parse_output = gr.Markdown( |
| label="Dependency Parse Results", |
| value="Parse results will appear here..." |
| ) |
|
|
| with gr.Accordion("Prolog Validation Results", open=False): |
| prolog_output = gr.Markdown( |
| value="Grammatical validation results will appear here..." |
| ) |
|
|
| with gr.Accordion("Download Options", open=False): |
| conllu_download = gr.File( |
| label="Download CoNLL-U Format", |
| visible=False |
| ) |
|
|
| with gr.Accordion("📖 Example Texts for Parsing", open=True): |
| gr.Markdown("**Simple Structures** - Test basic dependency relations") |
| simple_parse_examples = [ |
| "ⲁⲩⲱ ⲁϥⲙⲟⲩⲧⲉ ⲉⲣⲟϥ", |
| "ⲁⲛⲟⲕ ⲡⲉ ⲡⲛⲟⲩⲧⲉ ⲙⲡⲉⲕⲉⲓⲱⲧ", |
| "ⲡϫⲟⲉⲓⲥ ⲡⲉ ⲡⲁⲛⲟⲩⲧⲉ", |
| ] |
| gr.Examples( |
| examples=[[ex] for ex in simple_parse_examples], |
| inputs=parse_input, |
| label="Simple" |
| ) |
|
|
| gr.Markdown("**Complex Structures** - Test subordination and coordination") |
| complex_parse_examples = [ |
| "ⲁⲩⲱ ⲛⲧⲉⲣⲉϥⲛⲁⲩ ⲉⲡⲙⲏⲏϣⲉ ⲁϥϣⲡϩⲧⲏϥ ⲉϩⲣⲁⲓ ⲉϫⲱⲟⲩ", |
| "ⲁⲓⲉⲓ ⲅⲁⲣ ⲉⲙⲟⲩⲧⲉ ⲁⲛ ⲉⲛⲇⲓⲕⲁⲓⲟⲥ ⲁⲗⲗⲁ ⲛⲣⲉϥⲣⲛⲟⲃⲉ", |
| ] |
| gr.Examples( |
| examples=[[ex] for ex in complex_parse_examples], |
| inputs=parse_input, |
| label="Complex" |
| ) |
|
|
| gr.Markdown("**Full Texts** - Test discourse-level parsing") |
| text_parse_examples = [ |
| "ⲛⲉⲩⲛⲟⲩⲙⲏⲏϣⲉ ⲇⲉ ⲛϣⲱⲛⲉ ⲉⲩⲛⲕⲟⲧⲕ ϩⲙ ⲡⲙⲁ ⲉⲧⲙⲙⲁⲩ· ⲛϩⲁⲛⲃⲗⲗⲉ ⲙⲛ ⲛϩⲁⲛϭⲁⲗⲉ ⲙⲛ ⲛϣⲟⲩⲱⲟⲩ·", |
| ] |
| gr.Examples( |
| examples=[[ex] for ex in text_parse_examples], |
| inputs=parse_input, |
| label="Texts" |
| ) |
|
|
| def parse_coptic_text(text): |
| """Parse Coptic text with neural-symbolic validation""" |
| if not text or not text.strip(): |
| return "Please enter Coptic text to parse.", "", None |
|
|
| try: |
| from coptic_parser_core import CopticParserCore |
|
|
| |
| parser = CopticParserCore() |
| parser.load_parser() |
|
|
| |
| result = parser.parse_text(text) |
|
|
| if not result: |
| return "❌ Parsing failed. Please check input.", "", None |
|
|
| |
| main_output = f""" |
| ## Parse Results |
| |
| **Total Sentences**: {result['total_sentences']} |
| **Total Tokens**: {result['total_tokens']} |
| |
| ### Dependency Structure |
| |
| {parser.format_table(result)} |
| """ |
|
|
| |
| prolog_output_text = "" |
| if 'prolog_validation' in result and result['prolog_validation']: |
| validation = result['prolog_validation'] |
| prolog_output_text = "## 🔍 Prolog Validation (Walter Till Grammar)\n\n" |
|
|
| if validation.get('patterns_detected'): |
| prolog_output_text += "### ✅ Detected Grammatical Patterns\n\n" |
| for pattern in validation['patterns_detected']: |
| if isinstance(pattern, dict): |
| if pattern.get('is_tripartite'): |
| prolog_output_text += f"- **Tripartite Sentence**: {pattern.get('description', '')}\n" |
| prolog_output_text += f" ```\n {pattern.get('pattern', '')}\n ```\n" |
| else: |
| prolog_output_text += f"- {pattern}\n" |
| else: |
| prolog_output_text += f"- {pattern}\n" |
|
|
| if validation.get('warnings'): |
| prolog_output_text += "\n### ⚠️ Grammatical Warnings\n\n" |
| for warning in validation['warnings']: |
| prolog_output_text += f"- {warning}\n" |
|
|
| if not validation.get('warnings') and not validation.get('patterns_detected'): |
| prolog_output_text += "✓ No grammatical issues detected\n" |
| else: |
| prolog_output_text = "ℹ️ Prolog validation not available (requires SWI-Prolog)" |
|
|
| |
| conllu_content = parser.format_conllu(result) |
| conllu_path = "/tmp/coptic_parse.conllu" |
| with open(conllu_path, 'w', encoding='utf-8') as f: |
| f.write(conllu_content) |
|
|
| return main_output, prolog_output_text, conllu_path |
|
|
| except Exception as e: |
| return f"❌ Error: {str(e)}", "", None |
|
|
| parse_btn.click( |
| fn=parse_coptic_text, |
| inputs=parse_input, |
| outputs=[parse_output, prolog_output, conllu_download] |
| ) |
|
|
| gr.Markdown(""" |
| --- |
| ### About This Research Interface |
| |
| **Translation Models**: |
| - [megalaa/coptic-english-translator](https://huggingface.co/megalaa/coptic-english-translator) & [megalaa/english-coptic-translator](https://huggingface.co/megalaa/english-coptic-translator) |
| - Based on work by Enis & Megalaa (2024) |
| |
| **Dependency Parser** (Neural-Symbolic Hybrid): |
| - **Neural**: Stanza NLP pipeline + DiaParser for Coptic |
| - **Symbolic**: Prolog implementation of Walter Till's Coptic grammar |
| - **Lexicon**: Integration with Crum's Coptic Dictionary |
| - **Error Detection**: Prolog validation catches neural parser hallucinations |
| |
| **Research Features**: |
| - CoNLL-U format export for corpus analysis |
| - Grammatical pattern detection (tripartite sentences, etc.) |
| - Dialect-aware processing (Sahidic/Bohairic) |
| """) |
|
|
| if __name__ == "__main__": |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| show_error=True |
| ) |
|
|