boffire's picture
Update app.py
3252379 verified
#!/usr/bin/env python3
"""
Kabyle Translation Hub - Hugging Face Spaces Edition
Simple translation interface: MarianMT vs LibreTranslate
Users choose their preferred translation - no metrics, no scores.
"""
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import os
import requests
import torch
from flask import Flask, request, render_template_string, jsonify
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from concurrent.futures import ThreadPoolExecutor, as_completed
# Configuration
LIBRETRANSLATE_URL = os.environ.get("LIBRETRANSLATE_URL", "https://imsidag-community-libretranslate-kabyle.hf.space/translate")
MODEL_ID = "boffire/marianmt-en-kab"
# LibreTranslate Kabyle variants
KABYLE_VARIANTS = {
"Taqbaylit (Standard)": "kab",
"Taqbaylit (Latest)": "kab_kab",
"Taqbaylit (Tasenselkimt)": "kab_comp",
"Taqbaylit (51000)": "kab_comp2",
"Taqbaylit (OS)": "kab_os",
"Taqbaylit (Num)": "kab_num",
}
# Global variables for model caching
model = None
tokenizer = None
device = None
def load_model():
"""Load MarianMT model once and cache it"""
global model, tokenizer, device
if model is None:
print("Loading MarianMT model...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID).to(device).eval()
print(f"Model loaded successfully on {device}")
return model, tokenizer, device
def translate_marian(text):
"""Translate using MarianMT with multiple alternatives"""
if not text or not text.strip():
return ["Please enter text to translate"]
try:
model, tokenizer, device = load_model()
# Prepare inputs
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
# Simple beam search without group beam search
outputs = model.generate(
**inputs,
num_beams=4,
num_return_sequences=3,
max_length=128,
early_stopping=True,
do_sample=False,
)
translations = []
for output in outputs:
trans = tokenizer.decode(output, skip_special_tokens=True)
if trans and trans not in translations:
translations.append(trans)
return translations if translations else ["[Error: No translation generated]"]
except Exception as e:
print(f"MarianMT translation error: {e}")
import traceback
traceback.print_exc()
return [f"[Error: {str(e)}]"]
def translate_libre_variant(text, variant_code):
"""Translate using a specific LibreTranslate variant"""
try:
r = requests.post(
LIBRETRANSLATE_URL,
headers={"Content-Type": "application/json"},
json={"q": text, "source": "en", "target": variant_code},
timeout=10
)
r.raise_for_status()
result = r.json().get("translatedText", "[Error: No translation]")
return {"success": True, "text": result}
except Exception as e:
return {"success": False, "text": f"[Error: {str(e)[:50]}]"}
def translate_libre_all_variants(text):
"""Translate using all LibreTranslate variants in parallel"""
results = {}
with ThreadPoolExecutor(max_workers=4) as executor:
future_to_name = {
executor.submit(translate_libre_variant, text, code): name
for name, code in KABYLE_VARIANTS.items()
}
for future in as_completed(future_to_name, timeout=15):
name = future_to_name[future]
try:
results[name] = future.result()
except Exception as e:
results[name] = {"success": False, "text": f"[Error: {e}]"}
return results
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Kabyle Translation Hub</title>
<style>
:root {
--bg-primary: #0f172a;
--bg-secondary: #1e293b;
--bg-tertiary: #334155;
--accent-primary: #6366f1;
--accent-secondary: #8b5cf6;
--accent-success: #10b981;
--text-primary: #f1f5f9;
--text-secondary: #94a3b8;
--border: #475569;
--radius: 12px;
--radius-sm: 8px;
}
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: system-ui, -apple-system, sans-serif;
background: linear-gradient(135deg, var(--bg-primary) 0%, #1a1a2e 100%);
color: var(--text-primary);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1000px;
margin: 0 auto;
}
header {
text-align: center;
padding: 40px 20px;
}
header h1 {
font-size: 2.5rem;
font-weight: 800;
background: linear-gradient(135deg, var(--accent-primary), var(--accent-secondary));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 10px;
}
header p {
color: var(--text-secondary);
font-size: 1.1rem;
}
.input-card {
background: var(--bg-secondary);
border-radius: var(--radius);
padding: 24px;
margin-bottom: 24px;
border: 1px solid var(--border);
}
.input-group {
display: flex;
flex-direction: column;
gap: 16px;
}
textarea {
width: 100%;
padding: 16px;
background: var(--bg-tertiary);
border: 2px solid var(--border);
border-radius: var(--radius-sm);
color: var(--text-primary);
font-size: 16px;
resize: vertical;
min-height: 120px;
font-family: inherit;
transition: border-color 0.2s;
}
textarea:focus {
outline: none;
border-color: var(--accent-primary);
}
textarea::placeholder {
color: var(--text-secondary);
}
.btn-primary {
background: linear-gradient(135deg, var(--accent-primary), var(--accent-secondary));
color: white;
border: none;
padding: 16px 32px;
border-radius: var(--radius-sm);
font-size: 16px;
font-weight: 600;
cursor: pointer;
transition: transform 0.2s, box-shadow 0.2s;
display: flex;
align-items: center;
justify-content: center;
gap: 8px;
}
.btn-primary:hover {
transform: translateY(-2px);
box-shadow: 0 10px 30px rgba(99, 102, 241, 0.3);
}
.btn-primary:disabled {
opacity: 0.6;
cursor: not-allowed;
transform: none;
}
.results-container {
display: flex;
flex-direction: column;
gap: 20px;
}
.engine-card {
background: var(--bg-secondary);
border-radius: var(--radius);
border: 1px solid var(--border);
overflow: hidden;
}
.engine-header {
padding: 20px 24px;
background: var(--bg-tertiary);
border-bottom: 1px solid var(--border);
display: flex;
align-items: center;
justify-content: space-between;
}
.engine-title {
display: flex;
align-items: center;
gap: 12px;
font-size: 1.1rem;
font-weight: 600;
}
.engine-badge {
font-size: 0.75rem;
padding: 4px 12px;
background: var(--bg-secondary);
border-radius: 20px;
color: var(--text-secondary);
border: 1px solid var(--border);
}
.translation-list {
list-style: none;
}
.translation-item {
padding: 20px 24px;
border-bottom: 1px solid var(--border);
cursor: pointer;
transition: all 0.2s;
display: flex;
align-items: center;
justify-content: space-between;
gap: 16px;
}
.translation-item:last-child {
border-bottom: none;
}
.translation-item:hover {
background: rgba(99, 102, 241, 0.1);
}
.translation-item.selected {
background: rgba(16, 185, 129, 0.15);
border-left: 4px solid var(--accent-success);
}
.translation-text {
flex: 1;
font-size: 1.05rem;
line-height: 1.6;
color: var(--text-primary);
}
.copy-icon {
opacity: 0;
color: var(--accent-primary);
transition: opacity 0.2s;
flex-shrink: 0;
}
.translation-item:hover .copy-icon {
opacity: 1;
}
.variant-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 1px;
background: var(--border);
}
.variant-item {
background: var(--bg-secondary);
padding: 16px 20px;
cursor: pointer;
transition: all 0.2s;
display: flex;
flex-direction: column;
gap: 8px;
position: relative;
}
.variant-item:hover {
background: rgba(99, 102, 241, 0.1);
}
.variant-item.selected {
background: rgba(16, 185, 129, 0.15);
box-shadow: inset 4px 0 0 var(--accent-success);
}
.variant-header {
display: flex;
align-items: center;
justify-content: space-between;
}
.variant-name {
font-size: 0.75rem;
color: var(--text-secondary);
text-transform: uppercase;
letter-spacing: 0.5px;
font-weight: 600;
}
.variant-code {
font-size: 0.7rem;
color: var(--text-secondary);
font-family: monospace;
background: var(--bg-tertiary);
padding: 2px 8px;
border-radius: 4px;
}
.variant-text {
font-size: 1rem;
color: var(--text-primary);
line-height: 1.5;
}
.variant-item.error {
opacity: 0.6;
}
.variant-item.error .variant-text {
color: #ef4444;
font-family: monospace;
font-size: 0.85rem;
}
.source-display {
background: var(--bg-tertiary);
padding: 20px 24px;
border-radius: var(--radius);
margin-bottom: 20px;
border: 1px solid var(--border);
}
.source-label {
font-size: 0.75rem;
color: var(--text-secondary);
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: 8px;
}
.source-text {
font-size: 1.1rem;
color: var(--text-primary);
line-height: 1.6;
}
.check-icon {
color: var(--accent-success);
opacity: 0;
transition: opacity 0.2s;
}
.translation-item.selected .check-icon,
.variant-item.selected .check-icon {
opacity: 1;
}
.toast {
position: fixed;
bottom: 24px;
left: 50%;
transform: translateX(-50%) translateY(100px);
background: var(--accent-success);
color: white;
padding: 12px 24px;
border-radius: 100px;
font-weight: 600;
opacity: 0;
transition: all 0.3s ease;
z-index: 1000;
box-shadow: 0 10px 30px rgba(0,0,0,0.3);
}
.toast.show {
transform: translateX(-50%) translateY(0);
opacity: 1;
}
.loading {
display: inline-block;
width: 18px;
height: 18px;
border: 2px solid rgba(255,255,255,0.3);
border-radius: 50%;
border-top-color: white;
animation: spin 0.8s linear infinite;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.empty-state {
text-align: center;
padding: 60px 20px;
color: var(--text-secondary);
}
.empty-state svg {
width: 64px;
height: 64px;
margin-bottom: 16px;
opacity: 0.5;
}
@media (max-width: 640px) {
header h1 {
font-size: 1.75rem;
}
.variant-grid {
grid-template-columns: 1fr;
}
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>English → Kabyle</h1>
<p>Choose the translation that suits you best</p>
</header>
<div class="input-card">
<form method="POST" id="translateForm">
<div class="input-group">
<textarea
name="text"
id="inputText"
placeholder="Enter English text to translate..."
required
autocomplete="off"
>{{ request.form.get('text', '') }}</textarea>
<button type="submit" class="btn-primary" id="submitBtn">
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<line x1="22" y1="2" x2="11" y2="13"></line>
<polygon points="22 2 15 22 11 13 2 9 22 2"></polygon>
</svg>
<span>Translate</span>
</button>
</div>
</form>
</div>
{% if source_text %}
<div class="source-display">
<div class="source-label">Source Text</div>
<div class="source-text">{{ source_text }}</div>
</div>
<div class="results-container">
<!-- MarianMT Results -->
<div class="engine-card">
<div class="engine-header">
<div class="engine-title">
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" style="color: var(--accent-primary)">
<rect x="4" y="4" width="16" height="16" rx="2"></rect>
<rect x="9" y="9" width="6" height="6"></rect>
</svg>
MarianMT (Neural)
</div>
<span class="engine-badge">{{ marian|length }} options</span>
</div>
<ul class="translation-list">
{% for trans in marian %}
<li class="translation-item" onclick="selectTranslation(this, {{ trans | tojson }})" ondblclick="copyText({{ trans | tojson }})">
<span class="translation-text">{{ trans }}</span>
<svg class="check-icon" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3">
<polyline points="20 6 9 17 4 12"></polyline>
</svg>
<svg class="copy-icon" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<rect x="9" y="9" width="13" height="13" rx="2"></rect>
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path>
</svg>
</li>
{% endfor %}
</ul>
</div>
<!-- LibreTranslate Results -->
<div class="engine-card">
<div class="engine-header">
<div class="engine-title">
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" style="color: var(--accent-success)">
<circle cx="12" cy="12" r="10"></circle>
<line x1="2" y1="12" x2="22" y2="12"></line>
<path d="M12 2a15.3 15.3 0 0 1 4 10 15.3 15.3 0 0 1-4 10 15.3 15.3 0 0 1-4-10 15.3 15.3 0 0 1 4-10z"></path>
</svg>
LibreTranslate Variants
</div>
<span class="engine-badge">{{ libre|length }} variants</span>
</div>
<div class="variant-grid">
{% for variant_name, variant_data in libre.items() %}
{% if variant_data.success %}
<div class="variant-item" onclick="selectVariant(this, {{ variant_data.text | tojson }})" ondblclick="copyText({{ variant_data.text | tojson }})">
<div class="variant-header">
<span class="variant-name">{{ variant_name }}</span>
<span class="variant-code">{{ variant_data.code }}</span>
</div>
<div class="variant-text">{{ variant_data.text }}</div>
<svg class="check-icon" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" style="position: absolute; top: 16px; right: 16px;">
<polyline points="20 6 9 17 4 12"></polyline>
</svg>
</div>
{% else %}
<div class="variant-item error">
<div class="variant-header">
<span class="variant-name">{{ variant_name }}</span>
<span class="variant-code">{{ variant_data.code }}</span>
</div>
<div class="variant-text">{{ variant_data.text }}</div>
</div>
{% endif %}
{% endfor %}
</div>
</div>
</div>
{% else %}
<div class="empty-state">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
<path d="M12 2L2 7l10 5 10-5-10-5zM2 17l10 5 10-5M2 12l10 5 10-5"/>
</svg>
<p>Enter text above and click Translate to see results</p>
</div>
{% endif %}
</div>
<div class="toast" id="toast">Copied to clipboard!</div>
<script>
let selectedText = '';
function selectTranslation(element, text) {
element.parentElement.querySelectorAll('.translation-item').forEach(item => {
item.classList.remove('selected');
});
element.classList.add('selected');
selectedText = text;
copyText(text, false);
}
function selectVariant(element, text) {
document.querySelectorAll('.variant-item').forEach(item => {
item.classList.remove('selected');
});
element.classList.add('selected');
selectedText = text;
copyText(text, false);
}
async function copyText(text, showToast = true) {
try {
await navigator.clipboard.writeText(text);
if (showToast) {
showToastMessage('Copied to clipboard!');
}
} catch (err) {
const textArea = document.createElement('textarea');
textArea.value = text;
document.body.appendChild(textArea);
textArea.select();
document.execCommand('copy');
document.body.removeChild(textArea);
if (showToast) {
showToastMessage('Copied to clipboard!');
}
}
}
function showToastMessage(message) {
const toast = document.getElementById('toast');
toast.textContent = message;
toast.classList.add('show');
setTimeout(() => toast.classList.remove('show'), 2000);
}
document.getElementById('translateForm').addEventListener('submit', function() {
const btn = document.getElementById('submitBtn');
btn.disabled = true;
btn.innerHTML = '<span class="loading"></span> Translating...';
});
</script>
</body>
</html>
"""
app = Flask(__name__)
@app.route("/", methods=["GET", "POST"])
def index():
marian = []
libre = {}
source_text = ""
if request.method == "POST":
source_text = request.form.get("text", "").strip()
if source_text:
marian = translate_marian(source_text)
libre_results = translate_libre_all_variants(source_text)
for name, data in libre_results.items():
data['code'] = KABYLE_VARIANTS[name]
libre[name] = data
return render_template_string(
HTML_TEMPLATE,
marian=marian,
libre=libre,
source_text=source_text
)
@app.route("/health")
def health():
return jsonify({
"status": "healthy",
"model_loaded": model is not None
})
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
app.run(host="0.0.0.0", port=port, debug=False)