Spaces:

Norelad
/

coptic-translation-interface

Sleeping

Rogaton Claude commited on Apr 9

Commit

38ecdf5

1 Parent(s): 6d0a56b

Create Gradio-based Coptic translation interface for HF Space

- Replace training app with bidirectional translation interface
- Support Coptic→English and English→Coptic translation
- Use Norelad/coptic-megalaa-finetuned and megalaa/english-coptic-translator
- Update requirements.txt for Gradio deployment
- Remove old training space files

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (4) hide show

app.py +215 -0
hf_space_megalaa_training/app.py +0 -470
hf_space_megalaa_training/requirements.txt +0 -11
requirements.txt +1 -5

app.py ADDED Viewed

	@@ -0,0 +1,215 @@

+#!/usr/bin/env python3
+"""
+Coptic Translation Interface - Hugging Face Space
+Supports Coptic↔English translation using fine-tuned MEGALAA models
+"""
+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# Coptic-Greek character mappings for model preprocessing
+COPTIC_TO_GREEK = {
+    "ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
+    "ⲍ": "ζ", "ⲏ": "η", "ⲑ": "θ", "ⲓ": "ι", "ⲕ": "κ", "ⲗ": "λ",
+    "ⲙ": "μ", "ⲛ": "ν", "ⲝ": "ξ", "ⲟ": "ο", "ⲡ": "π", "ⲣ": "ρ",
+    "ⲥ": "σ", "ⲧ": "τ", "ⲩ": "υ", "ⲫ": "φ", "ⲭ": "χ", "ⲯ": "ψ", "ⲱ": "ω",
+    "ϣ": "ʃ", "ϥ": "f", "ϧ": "x", "ϩ": "h", "ϫ": "ɟ", "ϭ": "c", "ϯ": "ti",
+    "Ⲁ": "Α", "Ⲃ": "Β", "Ⲅ": "Γ", "Ⲇ": "Δ", "Ⲉ": "Ε", "Ⲍ": "Ζ", "Ⲏ": "Η",
+    "Ⲑ": "Θ", "Ⲓ": "Ι", "Ⲕ": "Κ", "Ⲗ": "Λ", "Ⲙ": "Μ", "Ⲛ": "Ν", "Ⲝ": "Ξ",
+    "Ⲟ": "Ο", "Ⲡ": "Π", "Ⲣ": "Ρ", "Ⲥ": "Σ", "Ⲧ": "Τ", "Ⲩ": "Υ", "Ⲫ": "Φ",
+    "Ⲭ": "Χ", "Ⲯ": "Ψ", "Ⲱ": "Ω", "Ϣ": "Ʃ", "Ϥ": "F", "Ϧ": "X", "Ϩ": "H",
+    "Ϫ": "Ɉ", "Ϭ": "C", "Ϯ": "TI"
+}
+GREEK_TO_COPTIC = {v: k for k, v in COPTIC_TO_GREEK.items()}
+def greekify(coptic_text):
+    """Convert Coptic Unicode to Greek transcription"""
+    return "".join(COPTIC_TO_GREEK.get(c.lower(), c.lower()) for c in coptic_text)
+def degreekify(greek_text):
+    """Convert Greek transcription back to Coptic Unicode"""
+    result = []
+    i = 0
+    while i < len(greek_text):
+        if i < len(greek_text) - 1 and greek_text[i:i+2].lower() == 'ti':
+            result.append(GREEK_TO_COPTIC.get('ti', greek_text[i:i+2]))
+            i += 2
+        else:
+            result.append(GREEK_TO_COPTIC.get(greek_text[i], greek_text[i]))
+            i += 1
+    return ''.join(result)
+# Model loading with caching
+coptic_to_english_model = None
+english_to_coptic_model = None
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def load_coptic_to_english():
+    global coptic_to_english_model
+    if coptic_to_english_model is None:
+        model_name = "Norelad/coptic-megalaa-finetuned"
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
+        coptic_to_english_model = (tokenizer, model)
+    return coptic_to_english_model
+def load_english_to_coptic():
+    global english_to_coptic_model
+    if english_to_coptic_model is None:
+        model_name = "megalaa/english-coptic-translator"
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
+        english_to_coptic_model = (tokenizer, model)
+    return english_to_coptic_model
+def translate_coptic_to_english(text, dialect='cop-sa'):
+    """Translate Coptic to English"""
+    try:
+        tokenizer, model = load_coptic_to_english()
+        # Dialect tags
+        DIALECT_TAGS = {'cop-sa': 'з', 'cop-bo': 'б', 'cop': 'з'}
+        dialect_tag = DIALECT_TAGS.get(dialect, 'з')
+        # Preprocess: Convert to Greek transcription and add dialect tag
+        greek_input = greekify(text.lower())
+        greek_input = f"{dialect_tag} {greek_input}"
+        # Generate translation
+        inputs = tokenizer(greek_input, return_tensors="pt", padding=True).to(device)
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=128,
+            num_beams=5,
+            early_stopping=True
+        )
+        return tokenizer.decode(outputs[0], skip_special_tokens=True)
+    except Exception as e:
+        return f"Translation error: {e}"
+def translate_english_to_coptic(text):
+    """Translate English to Coptic"""
+    try:
+        tokenizer, model = load_english_to_coptic()
+        # Generate translation
+        inputs = tokenizer(text, return_tensors="pt", padding=True).to(device)
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=128,
+            num_beams=5,
+            early_stopping=True
+        )
+        # Convert Greek output to Coptic
+        greek_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return degreekify(greek_output)
+    except Exception as e:
+        return f"Translation error: {e}"
+# Example texts
+COPTIC_EXAMPLES = [
+    ("ϯⲛⲁⲃⲱⲕ ⲉⲡⲏⲓ", "I will go to the house"),
+    ("ⲡⲉⲭⲣⲓⲥⲧⲟⲥ ⲡⲉ ⲡⲛⲟⲩⲧⲉ", "Christ is God"),
+    ("ⲁⲓⲛⲁⲩ ⲉⲡⲣⲱⲙⲉ", "I saw the man"),
+]
+ENGLISH_EXAMPLES = [
+    "The Lord is good",
+    "I am a teacher",
+    "We give thanks to God",
+]
+# Gradio Interface
+with gr.Blocks(title="Coptic Translation Interface", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🔮 Coptic Translation Interface
+    Translate between Coptic and English using fine-tuned MEGALAA models:
+    - **Coptic → English**: `Norelad/coptic-megalaa-finetuned`
+    - **English → Coptic**: `megalaa/english-coptic-translator`
+    Based on 50,000+ parallel sentences from CopticScriptorium corpus.
+    """)
+    with gr.Tab("Coptic → English"):
+        with gr.Row():
+            with gr.Column():
+                cop_input = gr.Textbox(
+                    label="Coptic Text",
+                    placeholder="Enter Coptic text (Unicode)...",
+                    lines=5
+                )
+                cop_dialect = gr.Radio(
+                    choices=[("Sahidic", "cop-sa"), ("Bohairic", "cop-bo")],
+                    value="cop-sa",
+                    label="Dialect"
+                )
+                cop_translate_btn = gr.Button("Translate to English", variant="primary")
+            with gr.Column():
+                cop_output = gr.Textbox(
+                    label="English Translation",
+                    lines=5,
+                    interactive=False
+                )
+        gr.Examples(
+            examples=[[ex[0], "cop-sa"] for ex in COPTIC_EXAMPLES],
+            inputs=[cop_input, cop_dialect],
+            outputs=cop_output,
+            fn=translate_coptic_to_english,
+            label="Example Coptic Texts"
+        )
+        cop_translate_btn.click(
+            fn=translate_coptic_to_english,
+            inputs=[cop_input, cop_dialect],
+            outputs=cop_output
+        )
+    with gr.Tab("English → Coptic"):
+        with gr.Row():
+            with gr.Column():
+                eng_input = gr.Textbox(
+                    label="English Text",
+                    placeholder="Enter English text...",
+                    lines=5
+                )
+                eng_translate_btn = gr.Button("Translate to Coptic", variant="primary")
+            with gr.Column():
+                eng_output = gr.Textbox(
+                    label="Coptic Translation",
+                    lines=5,
+                    interactive=False
+                )
+        gr.Examples(
+            examples=[[ex] for ex in ENGLISH_EXAMPLES],
+            inputs=eng_input,
+            outputs=eng_output,
+            fn=translate_english_to_coptic,
+            label="Example English Texts"
+        )
+        eng_translate_btn.click(
+            fn=translate_english_to_coptic,
+            inputs=eng_input,
+            outputs=eng_output
+        )
+    gr.Markdown("""
+    ---
+    ### About
+    This interface uses fine-tuned MarianMT models trained on the CopticScriptorium parallel corpus.
+    The models support bidirectional translation between Sahidic/Bohairic Coptic and English.
+    **Note**: For best results with Coptic input, use proper Unicode Coptic characters (U+2C80–U+2CFF).
+    """)
+if __name__ == "__main__":
+    demo.launch()

hf_space_megalaa_training/app.py DELETED Viewed

@@ -1,470 +0,0 @@
-#!/usr/bin/env python3
-"""
-HuggingFace Space for fine-tuning megalaa Coptic translation model
-This Gradio app provides a user-friendly interface for training the
-megalaa/coptic-english-translator model on your CopticScriptorium corpus.
-"""
-import gradio as gr
-import os
-import subprocess
-import threading
-import time
-from pathlib import Path
-# Global variable to track training status
-training_status = {
-    "running": False,
-    "log": [],
-    "completed": False,
-    "error": None
-}
-def train_model(train_file, val_file, num_epochs, batch_size, learning_rate, hf_token, model_repo_name):
-    """
-    Start model training with uploaded data files
-    """
-    global training_status
-    # Reset status
-    training_status = {
-        "running": True,
-        "log": ["🚀 Starting training setup...\n"],
-        "completed": False,
-        "error": None
-    }
-    try:
-        # Save uploaded files
-        train_path = "train.jsonl"
-        val_path = "val.jsonl"
-        with open(train_path, "wb") as f:
-            f.write(train_file)
-        with open(val_path, "wb") as f:
-            f.write(val_file)
-        training_status["log"].append(f"✓ Training data saved: {train_path}\n")
-        training_status["log"].append(f"✓ Validation data saved: {val_path}\n")
-        # Create training script
-        script_content = f'''#!/usr/bin/env python3
-import os
-import json
-import torch
-from datasets import load_dataset
-from transformers import (
-    AutoTokenizer,
-    AutoModelForSeq2SeqLM,
-    Seq2SeqTrainingArguments,
-    Seq2SeqTrainer,
-    DataCollatorForSeq2Seq,
-)
-from huggingface_hub import HfApi, login
-from evaluate import load
-import numpy as np
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# HuggingFace Hub configuration
-HF_TOKEN = "{hf_token}"
-MODEL_REPO_NAME = "{model_repo_name}"
-if HF_TOKEN:
-    login(token=HF_TOKEN)
-    logger.info("✓ Logged in to HuggingFace Hub")
-# Greekification for megalaa models
-COPTIC_TO_GREEK = {{
-    "ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
-    "ⲍ": "ζ", "ⲏ": "η", "ⲑ": "θ", "ⲓ": "ι", "ⲕ": "κ", "ⲗ": "λ",
-    "ⲙ": "μ", "ⲛ": "ν", "ⲝ": "ξ", "ⲟ": "ο", "ⲡ": "π", "ⲣ": "ρ",
-    "ⲥ": "σ", "ⲧ": "τ", "ⲩ": "υ", "ⲫ": "φ", "ⲭ": "χ", "ⲯ": "ψ",
-    "ⲱ": "ω", "ϣ": "s", "ϥ": "f", "ϧ": "k", "ϩ": "h", "ϫ": "j",
-    "ϭ": "c", "ϯ": "t",
-}}
-def greekify(text):
-    if not text:
-        return ""
-    return "".join(COPTIC_TO_GREEK.get(c.lower(), c.lower()) for c in text)
-def extract_parallel_texts(examples):
-    coptic_texts = []
-    english_texts = []
-    for messages in examples['messages']:
-        coptic_text = None
-        english_text = None
-        for msg in messages:
-            if msg['role'] == 'user' and 'Coptic text to English:' in msg['content']:
-                coptic_text = msg['content'].split('Coptic text to English:')[-1].strip()
-            elif msg['role'] == 'assistant':
-                english_text = msg['content']
-        coptic_texts.append(coptic_text)
-        english_texts.append(english_text)
-    return {{'coptic': coptic_texts, 'english': english_texts}}
-def preprocess_function(examples, tokenizer, max_length=256):
-    greekified_coptic = [greekify(text.lower()) if text else "" for text in examples["coptic"]]
-    model_inputs = tokenizer(
-        greekified_coptic,
-        max_length=max_length,
-        truncation=True,
-        padding="max_length"
-    )
-    labels = tokenizer(
-        text_target=examples["english"],
-        max_length=max_length,
-        truncation=True,
-        padding="max_length"
-    )
-    labels["input_ids"] = [
-        [(label if label != tokenizer.pad_token_id else -100) for label in labels_example]
-        for labels_example in labels["input_ids"]
-    ]
-    model_inputs["labels"] = labels["input_ids"]
-    return model_inputs
-def compute_metrics(eval_preds, tokenizer, metric):
-    preds, labels = eval_preds
-    if isinstance(preds, tuple):
-        preds = preds[0]
-    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
-    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
-    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
-    decoded_labels = [[label] for label in decoded_labels]
-    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
-    return {{"bleu": result["score"]}}
-# Configuration
-model_name = "megalaa/coptic-english-translator"
-output_dir = "coptic_megalaa_finetuned"
-num_epochs = {num_epochs}
-batch_size = {batch_size}
-learning_rate = {learning_rate}
-logger.info("="*60)
-logger.info("MEGALAA FINE-TUNING ON HUGGINGFACE SPACES")
-logger.info("="*60)
-logger.info(f"Base model: {{model_name}}")
-logger.info(f"Epochs: {{num_epochs}}")
-logger.info(f"Batch size: {{batch_size}}")
-logger.info(f"Learning rate: {{learning_rate}}")
-# Check GPU
-if torch.cuda.is_available():
-    logger.info(f"GPU: {{torch.cuda.get_device_name(0)}}")
-    logger.info(f"GPU Memory: {{torch.cuda.get_device_properties(0).total_memory / (1024**3):.1f}} GB")
-else:
-    logger.warning("No GPU detected!")
-# Load model
-logger.info("\\nLoading model...")
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-# Load datasets
-logger.info("Loading datasets...")
-train_dataset = load_dataset('json', data_files='{train_path}', split='train')
-val_dataset = load_dataset('json', data_files='{val_path}', split='train')
-logger.info(f"Train samples: {{len(train_dataset):,}}")
-logger.info(f"Validation samples: {{len(val_dataset):,}}")
-# Extract and tokenize
-logger.info("Processing datasets...")
-train_dataset = train_dataset.map(extract_parallel_texts, batched=True, remove_columns=['messages'])
-val_dataset = val_dataset.map(extract_parallel_texts, batched=True, remove_columns=['messages'])
-tokenized_train = train_dataset.map(
-    lambda examples: preprocess_function(examples, tokenizer),
-    batched=True,
-    remove_columns=['coptic', 'english']
-)
-tokenized_val = val_dataset.map(
-    lambda examples: preprocess_function(examples, tokenizer),
-    batched=True,
-    remove_columns=['coptic', 'english']
-)
-# Setup training
-data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model, padding=True)
-metric = load("sacrebleu")
-training_args = Seq2SeqTrainingArguments(
-    output_dir=output_dir,
-    num_train_epochs=num_epochs,
-    per_device_train_batch_size=batch_size,
-    per_device_eval_batch_size=batch_size,
-    gradient_accumulation_steps=2,
-    learning_rate=learning_rate,
-    warmup_steps=500,
-    max_grad_norm=1.0,
-    weight_decay=0.01,
-    eval_strategy="steps",
-    eval_steps=500,
-    logging_steps=50,
-    save_steps=500,
-    save_total_limit=3,
-    load_best_model_at_end=True,
-    metric_for_best_model="bleu",
-    greater_is_better=True,
-    predict_with_generate=True,
-    generation_max_length=256,
-    generation_num_beams=5,
-    fp16=torch.cuda.is_available(),
-    report_to="tensorboard",
-    logging_dir=f"{{output_dir}}/logs",
-    push_to_hub=False,
-)
-trainer = Seq2SeqTrainer(
-    model=model,
-    args=training_args,
-    train_dataset=tokenized_train,
-    eval_dataset=tokenized_val,
-    tokenizer=tokenizer,
-    data_collator=data_collator,
-    compute_metrics=lambda eval_preds: compute_metrics(eval_preds, tokenizer, metric)
-)
-logger.info("\\nSTARTING TRAINING")
-logger.info("="*60)
-# Train
-trainer.train()
-# Save locally
-logger.info("\\nSaving final model...")
-trainer.save_model(f"{{output_dir}}/final")
-tokenizer.save_pretrained(f"{{output_dir}}/final")
-# Push to HuggingFace Hub
-if HF_TOKEN and MODEL_REPO_NAME:
-    logger.info(f"\\nPushing model to HuggingFace Hub: {{MODEL_REPO_NAME}}")
-    try:
-        api = HfApi()
-        api.create_repo(repo_id=MODEL_REPO_NAME, repo_type="model", exist_ok=True)
-        # Upload all files
-        api.upload_folder(
-            folder_path=f"{{output_dir}}/final",
-            repo_id=MODEL_REPO_NAME,
-            repo_type="model",
-        )
-        logger.info(f"✅ Model successfully pushed to: https://huggingface.co/{{MODEL_REPO_NAME}}")
-    except Exception as e:
-        logger.error(f"❌ Failed to push to Hub: {{e}}")
-# Final evaluation
-logger.info("\\nFinal evaluation...")
-eval_results = trainer.evaluate()
-logger.info("\\n" + "="*60)
-logger.info("TRAINING COMPLETE!")
-logger.info("="*60)
-for key, value in eval_results.items():
-    logger.info(f"{{key}}: {{value}}")
-logger.info(f"\\n✅ Model saved locally to: {{output_dir}}/final")
-if HF_TOKEN and MODEL_REPO_NAME:
-    logger.info(f"✅ Model available at: https://huggingface.co/{{MODEL_REPO_NAME}}")
-'''
-        with open("train_script.py", "w") as f:
-            f.write(script_content)
-        training_status["log"].append("✓ Training script created\n")
-        training_status["log"].append("🚀 Starting training...\n\n")
-        # Run training in subprocess
-        process = subprocess.Popen(
-            ["python", "train_script.py"],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1
-        )
-        # Stream output
-        for line in process.stdout:
-            training_status["log"].append(line)
-            time.sleep(0.01)  # Small delay to allow UI updates
-        process.wait()
-        if process.returncode == 0:
-            training_status["completed"] = True
-            training_status["log"].append("\n\n✅ TRAINING COMPLETED SUCCESSFULLY!\n")
-            training_status["log"].append("📦 Model saved locally to: coptic_megalaa_finetuned/final\n")
-            if hf_token and model_repo_name:
-                training_status["log"].append(f"📦 Model pushed to: https://huggingface.co/{model_repo_name}\n")
-        else:
-            training_status["error"] = f"Training failed with exit code {process.returncode}"
-            training_status["log"].append(f"\n\n❌ Training failed with exit code {process.returncode}\n")
-    except Exception as e:
-        training_status["error"] = str(e)
-        training_status["log"].append(f"\n\n❌ Error: {str(e)}\n")
-    finally:
-        training_status["running"] = False
-def start_training(train_file, val_file, num_epochs, batch_size, learning_rate, hf_token, model_repo_name):
-    """
-    Start training in background thread
-    """
-    if training_status["running"]:
-        return "⚠️ Training already in progress!"
-    if not hf_token or not model_repo_name:
-        return "⚠️ Please provide both HuggingFace Token and Model Repository Name!"
-    # Start training thread
-    thread = threading.Thread(
-        target=train_model,
-        args=(train_file, val_file, num_epochs, batch_size, learning_rate, hf_token, model_repo_name)
-    )
-    thread.daemon = True
-    thread.start()
-    return "🚀 Training started! Monitor progress in the logs below."
-def get_training_log():
-    """
-    Return current training log
-    """
-    return "".join(training_status["log"])
-def check_status():
-    """
-    Return training status
-    """
-    if training_status["completed"]:
-        return "✅ Training completed!"
-    elif training_status["error"]:
-        return f"❌ Error: {training_status['error']}"
-    elif training_status["running"]:
-        return "🔄 Training in progress..."
-    else:
-        return "⏸️ Ready to train"
-# Create Gradio interface
-with gr.Blocks(title="Megalaa Coptic Fine-tuning") as demo:
-    gr.Markdown("""
-    # 🏛️ Megalaa Coptic Translation Fine-tuning
-    Fine-tune the megalaa/coptic-english-translator model on your CopticScriptorium corpus.
-    **⚙️ IMPORTANT:** Make sure this Space is running on **T4 Small GPU** for optimal performance!
-    """)
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 🔑 HuggingFace Hub Configuration")
-            hf_token_input = gr.Textbox(
-                label="HuggingFace Token",
-                placeholder="hf_...",
-                type="password",
-                info="Get your token from https://huggingface.co/settings/tokens"
-            )
-            model_repo_input = gr.Textbox(
-                label="Model Repository Name",
-                placeholder="username/coptic-megalaa-finetuned",
-                info="Example: john-doe/coptic-megalaa-finetuned"
-            )
-            gr.Markdown("### 📤 Upload Training Data")
-            train_file_upload = gr.File(
-                label="Training Data (train.jsonl)",
-                file_types=[".jsonl"]
-            )
-            val_file_upload = gr.File(
-                label="Validation Data (val.jsonl)",
-                file_types=[".jsonl"]
-            )
-            gr.Markdown("### ⚙️ Training Parameters")
-            num_epochs = gr.Slider(
-                minimum=1,
-                maximum=10,
-                value=5,
-                step=1,
-                label="Number of Epochs"
-            )
-            batch_size = gr.Slider(
-                minimum=4,
-                maximum=16,
-                value=8,
-                step=4,
-                label="Batch Size"
-            )
-            learning_rate = gr.Number(
-                value=2e-5,
-                label="Learning Rate"
-            )
-            start_btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
-            status_text = gr.Textbox(label="Status", value="⏸️ Ready to train")
-        with gr.Column():
-            gr.Markdown("### 📊 Training Log")
-            log_output = gr.Textbox(
-                label="Real-time Training Log",
-                lines=30,
-                max_lines=30,
-                autoscroll=True,
-                every=2
-            )
-    # Button actions
-    start_btn.click(
-        fn=start_training,
-        inputs=[train_file_upload, val_file_upload, num_epochs, batch_size, learning_rate, hf_token_input, model_repo_input],
-        outputs=status_text
-    )
-    # Auto-refresh log and status
-    demo.load(fn=get_training_log, outputs=log_output, every=2)
-    demo.load(fn=check_status, outputs=status_text, every=2)
-    gr.Markdown("""
-    ---
-    ### 📥 After Training
-    When training completes, your fine-tuned model will be automatically pushed to HuggingFace Hub!
-    **Next steps:**
-    1. Visit your model repository at `https://huggingface.co/YOUR_USERNAME/MODEL_NAME`
-    2. Download and test with: `python evaluate_megalaa_model.py`
-    3. Integrate into your Coptic translation interface
-    4. Share your model with the community!
-    **Estimated training time:** 6-8 hours on T4 GPU
-    **Note:** The model is also saved temporarily to `coptic_megalaa_finetuned/final/` during training,
-    but this local copy will be lost when the Space restarts. Use the HuggingFace Hub version!
-    """)
-if __name__ == "__main__":
-    demo.launch()

hf_space_megalaa_training/requirements.txt DELETED Viewed

@@ -1,11 +0,0 @@
-torch>=2.0.0
-transformers>=4.35.0
-datasets>=2.14.0
-accelerate>=0.24.0
-evaluate>=0.4.1
-sacrebleu>=2.3.1
-sentencepiece>=0.1.99
-protobuf>=3.20.0
-gradio>=4.44.0
-tensorboard>=2.15.0
-huggingface_hub>=0.20.0

requirements.txt CHANGED Viewed

@@ -1,10 +1,6 @@
-streamlit
-huggingface_hub
-lxml
-stanza
 torch>=2.0.0
 transformers>=4.35.0
 sentencepiece>=0.1.99
 protobuf>=3.20.0
 accelerate>=0.20.0
-pyswip>=0.2.10

+gradio
 torch>=2.0.0
 transformers>=4.35.0
 sentencepiece>=0.1.99
 protobuf>=3.20.0
 accelerate>=0.20.0