Spaces:

minte-two
/

GihonTech_Translation

Sleeping

App Files Files Community

Minte commited on Oct 9, 2025

Commit

2f77ad3

1 Parent(s): 5eebcbf

Add initial implementation of translation models and Gradio interface

Browse files

Files changed (2) hide show

app.py +308 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoProcessor
+# Language configuration with proper model handling
+LANGUAGE_CONFIG = {
+    "Amharic": {
+        "code": "amh",
+        "model_type": "seamless",
+        "seamless_code": "amh"
+    },
+    "Swahili": {
+        "code": "swh",
+        "model_type": "seamless",
+        "seamless_code": "swh"
+    },
+    "Somali": {
+        "code": "som",
+        "model_type": "seamless",
+        "seamless_code": "som"
+    },
+    "Afan Oromo": {
+        "code": "gaz",
+        "model_type": "nllb",
+        "nllb_code": "gaz_Latn"
+    },
+    "Tigrinya": {
+        "code": "tir",
+        "model_type": "nllb",
+        "nllb_code": "tir_Ethi"
+    },
+    "Chichewa": {
+        "code": "nya",
+        "model_type": "nllb",
+        "nllb_code": "nya_Latn"
+    }
+}
+# Model instances
+models = {}
+tokenizers = {}
+processors = {}
+print("🚀 Initializing translation models...")
+# Load SeamlessM4T model for Amharic, Swahili, Somali
+try:
+    print("📥 Loading SeamlessM4T model...")
+    seamless_model_id = "facebook/seamless-m4t-v2-large"
+    processors['seamless'] = AutoProcessor.from_pretrained(seamless_model_id)
+    models['seamless'] = AutoModelForSeq2SeqLM.from_pretrained(seamless_model_id)
+    print("✅ SeamlessM4T model loaded successfully!")
+except Exception as e:
+    print(f"❌ Failed to load SeamlessM4T model: {e}")
+    models['seamless'] = None
+    processors['seamless'] = None
+# Load NLLB model for other languages
+try:
+    print("📥 Loading NLLB model...")
+    nllb_model_id = "facebook/nllb-200-distilled-600M"
+    tokenizers['nllb'] = AutoTokenizer.from_pretrained(nllb_model_id)
+    models['nllb'] = AutoModelForSeq2SeqLM.from_pretrained(nllb_model_id)
+    print("✅ NLLB model loaded successfully!")
+except Exception as e:
+    print(f"❌ Failed to load NLLB model: {e}")
+    models['nllb'] = None
+    tokenizers['nllb'] = None
+def translate_with_seamless(text, source_lang_code):
+    """Translate text using SeamlessM4T model"""
+    try:
+        if models['seamless'] is None or processors['seamless'] is None:
+            return "SeamlessM4T model not available"
+        # Preprocess text
+        inputs = processors['seamless'](text=text, src_lang=source_lang_code, return_tensors="pt")
+        # Get BOS token for target language (English)
+        forced_bos_token_id = processors['seamless'].tokenizer.convert_tokens_to_ids("<|eng|>")
+        # Generate translation
+        with torch.no_grad():
+            generated_tokens = models['seamless'].generate(
+                **inputs,
+                forced_bos_token_id=forced_bos_token_id,
+                max_length=256
+            )
+        # Decode and return
+        translation = processors['seamless'].batch_decode(generated_tokens, skip_special_tokens=True)[0]
+        return translation
+    except Exception as e:
+        print(f"SeamlessM4T translation error: {e}")
+        return f"Translation failed: {str(e)[:200]}"
+def translate_with_nllb(text, source_lang_code):
+    """Translate text using NLLB model"""
+    try:
+        if models['nllb'] is None or tokenizers['nllb'] is None:
+            return "NLLB model not available"
+        # Tokenize input
+        inputs = tokenizers['nllb'](text, return_tensors="pt")
+        # Define target language (English)
+        forced_bos_token_id = tokenizers['nllb'].convert_tokens_to_ids("eng_Latn")
+        # Generate translation using beam search for better quality
+        with torch.no_grad():
+            generated_tokens = models['nllb'].generate(
+                **inputs,
+                forced_bos_token_id=forced_bos_token_id,
+                max_length=256,
+                num_beams=5,
+                early_stopping=True
+            )
+        # Decode
+        translation = tokenizers['nllb'].batch_decode(generated_tokens, skip_special_tokens=True)[0]
+        return translation
+    except Exception as e:
+        print(f"NLLB translation error: {e}")
+        return f"Translation failed: {str(e)[:200]}"
+def translate_text(text, source_language):
+    """Main translation function"""
+    if not text.strip():
+        return "Please enter text to translate"
+    if source_language not in LANGUAGE_CONFIG:
+        return f"Translation for {source_language} is not supported"
+    config = LANGUAGE_CONFIG[source_language]
+    try:
+        if config["model_type"] == "seamless":
+            return translate_with_seamless(text, config["seamless_code"])
+        else:  # nllb
+            return translate_with_nllb(text, config["nllb_code"])
+    except Exception as e:
+        print(f"Translation error for {source_language}: {e}")
+        return f"Translation failed: {str(e)[:200]}"
+# Example texts for each language
+EXAMPLE_TEXTS = {
+    "Amharic": "ሁሉም ሰው በሁሉም መብቶ��� እኩል ነው።",
+    "Swahili": "Habari za asubuhi, leo tunajifunza teknolojia ya usemi.",
+    "Somali": "Maanta waa maalin qurux badan oo qoraxdu si wanaagsan u iftiimayso.",
+    "Afan Oromo": "Akkam bulte, har'a technology dubbachuu baranna.",
+    "Tigrinya": "መዓልቲ ሰናይ፡ ሎሚ ቴክኖሎጂ ዘረባ ንፈልጥ።",
+    "Chichewa": "Alipo wina aliyense ali ndi ufulu wachibadwidwe."
+}
+# Test the models on startup
+def test_models():
+    print("🧪 Testing translation models...")
+    test_cases = [
+        ("Swahili", "Habari za asubuhi"),
+        ("Somali", "Maanta waa maalin fiican"),
+        ("Amharic", "ሰላም"),
+        ("Afan Oromo", "Akkam jirta"),
+        ("Tigrinya", "ሰላም"),
+        ("Chichewa", "Moni")
+    ]
+    for lang, text in test_cases:
+        try:
+            result = translate_text(text, lang)
+            print(f"✅ {lang} test: '{text}' → '{result}'")
+        except Exception as e:
+            print(f"❌ {lang} test failed: {e}")
+# Run tests on startup
+test_models()
+# Create Gradio interface
+with gr.Blocks(
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="green"
+    ),
+    title="🌍 GihonTech - Local Language to English Translation"
+) as demo:
+    gr.Markdown("# 🌍 GihonTech Local Language to English Translation")
+    gr.Markdown("Translate text from African languages to English using advanced AI models")
+    with gr.Row():
+        with gr.Column(scale=1):
+            text_input = gr.Textbox(
+                label="Source Text",
+                placeholder="Enter text to translate...",
+                lines=4,
+                show_copy_button=True
+            )
+            language_select = gr.Dropdown(
+                choices=list(LANGUAGE_CONFIG.keys()),
+                value="Amharic",
+                label="Source Language",
+                info="Select the language of your text"
+            )
+            # Example buttons in two rows
+            with gr.Row():
+                for lang in ["Amharic", "Swahili", "Somali"]:
+                    gr.Button(
+                        f"{lang} Example",
+                        size="sm"
+                    ).click(
+                        lambda l=lang: EXAMPLE_TEXTS[l],
+                        outputs=text_input
+                    )
+            with gr.Row():
+                for lang in ["Afan Oromo", "Tigrinya", "Chichewa"]:
+                    gr.Button(
+                        f"{lang} Example",
+                        size="sm"
+                    ).click(
+                        lambda l=lang: EXAMPLE_TEXTS[l],
+                        outputs=text_input
+                    )
+            translate_btn = gr.Button(
+                "🎯 Translate to English",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            translation_output = gr.Textbox(
+                label="English Translation",
+                placeholder="Your translated text will appear here...",
+                lines=5,
+                show_copy_button=True
+            )
+    # Connect the translate button
+    translate_btn.click(
+        fn=translate_text,
+        inputs=[text_input, language_select],
+        outputs=translation_output
+    )
+    # Also allow pressing Enter to translate
+    text_input.submit(
+        fn=translate_text,
+        inputs=[text_input, language_select],
+        outputs=translation_output
+    )
+    # Model status and information
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 🔧 Model Information")
+            # Create status display
+            seamless_status = "✅ Loaded" if models.get('seamless') else "❌ Failed"
+            nllb_status = "✅ Loaded" if models.get('nllb') else "❌ Failed"
+            status_text = f"SeamlessM4T: {seamless_status} | NLLB: {nllb_status}"
+            gr.Textbox(
+                value=status_text,
+                label="Model Status",
+                interactive=False
+            )
+            # Create model info
+            seamless_langs = [lang for lang, config in LANGUAGE_CONFIG.items() if config["model_type"] == "seamless"]
+            nllb_langs = [lang for lang, config in LANGUAGE_CONFIG.items() if config["model_type"] == "nllb"]
+            gr.Markdown(f"""
+            **Advanced Models (SeamlessM4T):** {', '.join(seamless_langs)}
+            **Standard Models (NLLB-200):** {', '.join(nllb_langs)}
+            **Features:**
+            - High-quality translations for African languages
+            - Support for text input and copy-paste functionality
+            - Fast and accurate results using beam search
+            - Proper tokenization for each language family
+            """)
+    # Add CSS for better styling
+    gr.HTML("""
+    <style>
+    .gradio-container {
+        max-width: 1200px !important;
+    }
+    .textbox textarea {
+        min-height: 120px;
+    }
+    </style>
+    """)
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+# Minimal requirements.txt
+torch>=2.0.1
+transformers>=4.35.0
+gradio>=4.0.0
+soundfile>=0.12.0
+resampy>=0.4.0
+numpy>=1.24.0