Spaces:

tx3bas
/

Traduceme

Sleeping

tx3bas commited on Mar 5, 2025

Commit

82ada27

verified ·

1 Parent(s): ff7deb2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from mtranslate import translate
 import re
 import json
 # Diccionario de idiomas (sin cambios)
 lang_dict = {
@@ -29,32 +30,35 @@ def split_text(text, limit=4000):
     return chunks
 def translate_html_content(text, source_lang, target_lang):
-    # Patrón para identificar etiquetas HTML
     html_tag_pattern = re.compile(r'<[^>]+>')
-    # Separar texto y etiquetas
-    parts = html_tag_pattern.split(text)
     tags = html_tag_pattern.findall(text)
-    # Traducir solo las partes de texto
-    translated_parts = []
-    for part in parts:
-        if part.strip():  # Solo traducir si hay texto no vacío
-            chunks = split_text(part)
-            translated_chunks = [translate(chunk, target_lang, source_lang) for chunk in chunks]
-            translated_part = ''.join(translated_chunks)
-            # Limpiar caracteres escapados no deseados
-            translated_part = translated_part.replace('\\"', '"').replace('\\n', '\n').replace('\\\\', '\\')
-            translated_parts.append(translated_part)
-        else:
-            translated_parts.append(part)  # Mantener partes vacías o espacios
-    # Reconstruir el texto
-    translated_text = ''
-    for i in range(len(parts)):
-        translated_text += translated_parts[i]
-        if i < len(tags):
-            translated_text += tags[i]
     return translated_text

 from mtranslate import translate
 import re
 import json
+import uuid
 # Diccionario de idiomas (sin cambios)
 lang_dict = {
     return chunks
 def translate_html_content(text, source_lang, target_lang):
+    # Patrón para identificar etiquetas HTML completas
     html_tag_pattern = re.compile(r'<[^>]+>')
+    # Encontrar todas las etiquetas HTML
     tags = html_tag_pattern.findall(text)
+    # Reemplazar cada etiqueta con un marcador único
+    markers = {f"{{{uuid.uuid4()}}}": tag for tag in tags}
+    for marker, tag in markers.items():
+        text = text.replace(tag, marker)
+    # Dividir el texto en chunks respetando los marcadores
+    chunks = split_text(text)
+    # Traducir cada chunk
+    translated_chunks = []
+    for chunk in chunks:
+        # Traducir el chunk
+        translated_chunk = translate(chunk, target_lang, source_lang)
+        # Limpiar caracteres escapados no deseados
+        translated_chunk = translated_chunk.replace('\\"', '"').replace('\\n', '\n').replace('\\\\', '\\')
+        translated_chunks.append(translated_chunk)
+    # Unir los chunks traducidos
+    translated_text = ''.join(translated_chunks)
+    # Restaurar las etiquetas originales
+    for marker, tag in markers.items():
+        translated_text = translated_text.replace(marker, tag)
     return translated_text