Spaces:

tx3bas
/

Traduceme

Sleeping

App Files Files Community

tx3bas commited on Mar 5, 2025

Commit

3c3ebfd

verified ·

1 Parent(s): 8017c3e

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -93

app.py CHANGED Viewed

@@ -1,74 +1,49 @@
 import gradio as gr
 from mtranslate import translate
 import re
 # Diccionario de idiomas y sus códigos
 lang_dict = {
     'Automático': 'auto',
-    'Español': 'es',
-    'English': 'en',
-    'Mandarín': 'zh',
-    'Hindi': 'hi',
-    'Árabe': 'ar',
-    'Portugués': 'pt',
-    'Bengalí': 'bn',
-    'Ruso': 'ru',
-    'Japonés': 'ja',
-    'Panyabí': 'pa',
-    'Alemán': 'de',
-    'Javanés': 'jw',
-    'Coreano': 'ko',
-    'Francés': 'fr',
-    'Vietnamita': 'vi',
-    'Turco': 'tr',
-    'Italiano': 'it',
-    'Ucraniano': 'uk',
-    'Tailandés': 'th',
-    'Guyaratí': 'gu',
-    'Polaco': 'pl',
-    'Griego': 'el',
-    'Neerlandés': 'nl',
-    'Sueco': 'sv',
-    'Rumano': 'ro',
-    'Checo': 'cs',
-    'Húngaro': 'hu',
-    'Hebreo': 'he',
-    'Indonesio': 'id',
-    'Nepalí': 'ne',
-    'Gallego': 'gl',
-    'Catalán': 'ca',
     'Vasco': 'eu'
 }
 lang_list = list(lang_dict.keys())
-def split_html_content(text):
-    """Separa etiquetas HTML completas y texto traducible"""
-    # Expresión regular para capturar etiquetas HTML completas y texto fuera de ellas
-    pattern = r'(<[^>]+(?:>.*?</[^>]+>|>))|([^<]+)'
-    parts = []
-    for match in re.finditer(pattern, text, re.DOTALL):
-        if match.group(1):  # Elemento HTML completo (incluye etiquetas y contenido interno)
-            html_tag = match.group(1)
-            # Si tiene contenido interno, separarlo
-            content_match = re.search(r'>(.*?)</', html_tag, re.DOTALL)
-            if content_match and content_match.group(1).strip():
-                content = content_match.group(1).strip()
-                opening = html_tag[:content_match.start(1) + 1]
-                closing = html_tag[content_match.end(1):]
-                parts.append(('html', opening))
-                parts.append(('text', content))
-                parts.append(('html', closing))
-            else:
-                parts.append(('html', html_tag))
-        elif match.group(2):  # Texto fuera de etiquetas
-            parts.append(('text', match.group(2).strip()))
-    return parts
 def split_text(text, limit=4000):
-    """Divide texto en fragmentos respetando el límite"""
     sentences = re.split(r'([;.])', text)
     chunks = []
     chunk = ''
@@ -82,55 +57,111 @@ def split_text(text, limit=4000):
         chunks.append(chunk)
     return chunks
 def translate_text(source_lang, target_lang, text):
-    """Traduce contenido manteniendo etiquetas HTML intactas"""
     source_code = lang_dict[source_lang]
     target_code = lang_dict[target_lang]
-    # Separar HTML y contenido
-    parts = split_html_content(text)
-    # Lista de términos que no deben traducirse (nombres propios, marcas, etc.)
-    preserve_terms = {'DGM Services', 'Alexandru George Bratosin', 'X2596200v', 'info@dgm-services.com',
-                      'https://dgm-services.com/', 'Calle Federico García Lorca', '722 17 99 13'}
-    # Traducir solo las partes de texto
-    translated_parts = []
-    for part_type, content in parts:
-        if part_type == 'text' and content:
-            # Verificar si el contenido está en la lista de términos a preservar
-            if content in preserve_terms:
-                translated_parts.append(content)
-            else:
-                # Dividir en fragmentos si es necesario
-                chunks = split_text(content)
-                translated_chunks = [
-                    translate(chunk, target_code, source_code)
-                    for chunk in chunks
-                ]
-                translated_parts.append(''.join(translated_chunks))
         else:
-            # Mantener etiquetas HTML sin cambios
-            translated_parts.append(content)
-    return ''.join(translated_parts)
-def main(Texto, source_lang, target_lang):
     # Realizar la traducción
-    translated_text = translate_text(source_lang, target_lang, Texto)
-    return translated_text
 iface = gr.Interface(
     fn=main,
     inputs=[
-        gr.Textbox(label="Texto a traducir", lines=5),
-        gr.Dropdown(lang_list, label="Idioma origen", value="Automático"),
-        gr.Dropdown(lang_list, label="Idioma destino", value="Español")
     ],
-    outputs="text",  # Salida como texto plano
-    title="<div style='margin:0 auto;text-align:center'><div style='margin:0 auto;text-align:center'><img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/idioma.jpg'><p>Traducción sin límites</p></div>",
-    description="<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb;'>Ingresa el texto que deseas traducir, selecciona el idioma origen (o deja 'Automático') y el idioma de destino. ¡No hay límites!</p>",
-    article="<div style='margin-top:10px'><p style='text-align: center !important; background: #ffffff; padding: 5px 30px; border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;' title='Artxe Web' href='https://artxeweb.com'>© Artxe Web</a></p></div>"
 )
 iface.launch()

 import gradio as gr
 from mtranslate import translate
 import re
+import html
 # Diccionario de idiomas y sus códigos
 lang_dict = {
     'Automático': 'auto',
+    'Español': 'es',
+    'English': 'en',
+    'Mandarín': 'zh',
+    'Hindi': 'hi',
+    'Árabe': 'ar',
+    'Portugués': 'pt',
+    'Bengalí': 'bn',
+    'Ruso': 'ru',
+    'Japonés': 'ja',
+    'Panyabí': 'pa',
+    'Alemán': 'de',
+    'Javanés': 'jw',
+    'Coreano': 'ko',
+    'Francés': 'fr',
+    'Vietnamita': 'vi',
+    'Turco': 'tr',
+    'Italiano': 'it',
+    'Ucraniano': 'uk',
+    'Tailandés': 'th',
+    'Guyaratí': 'gu',
+    'Polaco': 'pl',
+    'Griego': 'el',
+    'Neerlandés': 'nl',
+    'Sueco': 'sv',
+    'Rumano': 'ro',
+    'Checo': 'cs',
+    'Húngaro': 'hu',
+    'Hebreo': 'he',
+    'Indonesio': 'id',
+    'Nepalí': 'ne',
+    'Gallego': 'gl',
+    'Catalán': 'ca',
     'Vasco': 'eu'
 }
 lang_list = list(lang_dict.keys())
 def split_text(text, limit=4000):
     sentences = re.split(r'([;.])', text)
     chunks = []
     chunk = ''
         chunks.append(chunk)
     return chunks
+def protect_html_tags(text):
+    # Identificar y extraer etiquetas HTML
+    html_pattern = re.compile(r'<([^>]+)>')
+    html_matches = html_pattern.finditer(text)
+    # Crear un diccionario de reemplazo
+    replacements = {}
+    # Crear una versión del texto con marcadores únicos en lugar de las etiquetas HTML
+    protected_text = text
+    for i, match in enumerate(html_matches):
+        placeholder = f"__HTML_TAG_{i}__"
+        replacements[placeholder] = match.group(0)
+        protected_text = protected_text.replace(match.group(0), placeholder, 1)
+    return protected_text, replacements
+def restore_html_tags(text, replacements):
+    # Restaurar las etiquetas HTML originales
+    restored_text = text
+    for placeholder, original in replacements.items():
+        restored_text = restored_text.replace(placeholder, original)
+    return restored_text
 def translate_text(source_lang, target_lang, text):
+    # Proteger etiquetas HTML
+    protected_text, replacements = protect_html_tags(text)
+    # Obtener códigos de idioma
     source_code = lang_dict[source_lang]
     target_code = lang_dict[target_lang]
+    # Dividir el texto en fragmentos para no superar el límite
+    chunks = split_text(protected_text)
+    # Traducir cada fragmento respetando el idioma de origen
+    translated_chunks = []
+    for chunk in chunks:
+        if source_lang == 'Automático':
+            # Si es automático, no especificamos el idioma de origen
+            translated_chunk = translate(chunk, target_code)
         else:
+            # Si se especificó un idioma de origen, lo usamos
+            translated_chunk = translate(chunk, target_code, source=source_code)
+        translated_chunks.append(translated_chunk)
+    translated_text = ''.join(translated_chunks)
+    # Restaurar etiquetas HTML
+    final_text = restore_html_tags(translated_text, replacements)
+    return final_text
+def main(texto, source_lang, target_lang):
+    # Verificar si hay texto para traducir
+    if not texto.strip():
+        return "<div>Por favor, ingresa un texto para traducir.</div>"
     # Realizar la traducción
+    translated_text = translate_text(source_lang, target_lang, texto)
+    # Escapar el texto traducido para JavaScript (para el botón de copia)
+    escaped_text = html.escape(translated_text).replace("'", "\\'").replace('"', '\\"')
+    # Generar HTML con botón de copia
+    html_output = f"""
+    <div style="position: relative;">
+        <div>{translated_text}</div>
+        <button
+            style="position: absolute; top: 0; right: 0; font-size: small; padding: 5px; background-color: #f0f0f0; color: #333; border: 1px solid #ccc; border-radius: 4px; cursor: pointer;"
+            onclick='navigator.clipboard.writeText("{escaped_text}").then(() => alert("Texto copiado al portapapeles"))'>
+            Copiar
+        </button>
+    </div>
+    """
+    return html_output
 iface = gr.Interface(
     fn=main,
     inputs=[
+        "text",
+        gr.Dropdown(lang_list, value="Automático", label="Idioma de Origen"),
+        gr.Dropdown(lang_list, label="Idioma de Destino")
     ],
+    outputs="html",
+    title="""<div style='margin:0 auto;text-align:center'>
+        <div style='margin:0 auto;text-align:center'>
+            <img style='width:100px;display: inline-table;margin-bottom:-10px' src='https://artxeweb.com/media/files/idioma.jpg'>
+            <p>Traducción sin límites</p>
+        </div>
+    </div>""",
+    description="""<p style='margin-bottom:10px;text-align:center;background: #ffffff; padding: 8px; border-radius: 8px;
+        border-width: 1px; border: solid 1px #e5e7eb;'>
+        Ingresa el texto que deseas traducir, selecciona el idioma de origen (o déjalo en "Automático")
+        y selecciona el idioma de destino. Las etiquetas HTML serán preservadas. ¡No hay límites!
+    </p>""",
+    article="""<div style='margin-top:10px'>
+        <p style='text-align: center !important; background: #ffffff; padding: 5px 30px;
+        border-radius: 8px; border-width: 1px; border: solid 1px #e5e7eb; width: fit-content; margin: auto;'>
+            Desarrollada por <a style='text-decoration: none !important; color: #e12a31 !important;'
+            title='Artxe Web' href='https://artxeweb.com'>© Artxe Web</a>
+        </p>
+    </div>"""
 )
 iface.launch()