#!/usr/bin/env python3 """ Kabyle Translation Hub - Hugging Face Spaces Edition Simple translation interface: MarianMT vs LibreTranslate Users choose their preferred translation - no metrics, no scores. """ import warnings warnings.filterwarnings("ignore", category=FutureWarning) import os import requests import torch from flask import Flask, request, render_template_string, jsonify from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from concurrent.futures import ThreadPoolExecutor, as_completed # Configuration LIBRETRANSLATE_URL = os.environ.get("LIBRETRANSLATE_URL", "https://imsidag-community-libretranslate-kabyle.hf.space/translate") MODEL_ID = "boffire/marianmt-en-kab" # LibreTranslate Kabyle variants KABYLE_VARIANTS = { "Taqbaylit (Standard)": "kab", "Taqbaylit (Latest)": "kab_kab", "Taqbaylit (Tasenselkimt)": "kab_comp", "Taqbaylit (51000)": "kab_comp2", "Taqbaylit (OS)": "kab_os", "Taqbaylit (Num)": "kab_num", } # Global variables for model caching model = None tokenizer = None device = None def load_model(): """Load MarianMT model once and cache it""" global model, tokenizer, device if model is None: print("Loading MarianMT model...") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID).to(device).eval() print(f"Model loaded successfully on {device}") return model, tokenizer, device def translate_marian(text): """Translate using MarianMT with multiple alternatives""" if not text or not text.strip(): return ["Please enter text to translate"] try: model, tokenizer, device = load_model() # Prepare inputs inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512) inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): # Simple beam search without group beam search outputs = model.generate( **inputs, num_beams=4, num_return_sequences=3, max_length=128, early_stopping=True, do_sample=False, ) translations = [] for output in outputs: trans = tokenizer.decode(output, skip_special_tokens=True) if trans and trans not in translations: translations.append(trans) return translations if translations else ["[Error: No translation generated]"] except Exception as e: print(f"MarianMT translation error: {e}") import traceback traceback.print_exc() return [f"[Error: {str(e)}]"] def translate_libre_variant(text, variant_code): """Translate using a specific LibreTranslate variant""" try: r = requests.post( LIBRETRANSLATE_URL, headers={"Content-Type": "application/json"}, json={"q": text, "source": "en", "target": variant_code}, timeout=10 ) r.raise_for_status() result = r.json().get("translatedText", "[Error: No translation]") return {"success": True, "text": result} except Exception as e: return {"success": False, "text": f"[Error: {str(e)[:50]}]"} def translate_libre_all_variants(text): """Translate using all LibreTranslate variants in parallel""" results = {} with ThreadPoolExecutor(max_workers=4) as executor: future_to_name = { executor.submit(translate_libre_variant, text, code): name for name, code in KABYLE_VARIANTS.items() } for future in as_completed(future_to_name, timeout=15): name = future_to_name[future] try: results[name] = future.result() except Exception as e: results[name] = {"success": False, "text": f"[Error: {e}]"} return results HTML_TEMPLATE = """
Choose the translation that suits you best
Enter text above and click Translate to see results