Spaces:

BugZoid
/

text-humanizer

Running

App Files Files Community

BugZoid commited on Jan 11, 2025

Commit

21bb05d

verified ·

1 Parent(s): 223938e

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -47

app.py CHANGED Viewed

@@ -18,76 +18,94 @@ if 'models_loaded' not in st.session_state:
     st.session_state.models_loaded = True
-def ensure_minimum_length(text, original_text):
     """
-    Garante que o texto gerado tenha pelo menos o mesmo tamanho do original
     """
-    while len(text.split()) < len(original_text.split()):
-        missing_words = len(original_text.split()) - len(text.split())
-        if missing_words > 0:
-            text = text + " " + original_text[-missing_words:]
-    return text
-def paraphrase_text(text, original_text):
     """
-    Apply paraphrasing to the input text using BART model
     """
-    min_length = len(original_text.split())
-    inputs = st.session_state.paraphrase_tokenizer.encode(
-        text,
         return_tensors="pt",
         max_length=1024,
         truncation=True
-    )
-    outputs = st.session_state.paraphrase_model.generate(
-        inputs,
         max_length=1024,
-        min_length=min_length,  # Força o tamanho mínimo igual ao original
         do_sample=True,
-        temperature=0.3,
-        top_p=0.95,
-        repetition_penalty=1.2,
-        length_penalty=2.0  # Aumentado para favorecer textos mais longos
     )
-    result = st.session_state.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return ensure_minimum_length(result, original_text)
-def humanize_text(text):
     """
-    Humanize the input text using T5 model
     """
-    min_length = len(text.split())
-    prompt = (
-        f"reescreva o seguinte texto em português de forma mais natural e humana, "
-        f"mantendo todas as informações e expandindo com detalhes relevantes: {text}"
-    )
-    input_ids = st.session_state.t5_tokenizer(
-        prompt,
         return_tensors="pt",
         max_length=1024,
         truncation=True
-    ).input_ids
-    outputs = st.session_state.t5_model.generate(
-        input_ids,
         max_length=1024,
-        min_length=min_length,  # Força o tamanho mínimo igual ao original
         do_sample=True,
-        temperature=0.3,
         top_p=0.95,
-        num_beams=5,
-        no_repeat_ngram_size=3,
         repetition_penalty=1.2,
-        length_penalty=2.0  # Aumentado para favorecer textos mais longos
     )
-    result = st.session_state.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return ensure_minimum_length(result, text)
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
@@ -95,8 +113,7 @@ st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
 st.title("🤖 → 🧑 Humanizador de Texto Avançado")
 st.markdown("""
 Este aplicativo transforma textos robotizados em linguagem mais natural e humana,
-mantendo todas as informações originais e garantindo que o texto final seja pelo menos
-do mesmo tamanho que o original.
 """)
 # Input area with expanded capabilities
@@ -112,7 +129,6 @@ with st.sidebar:
     use_paraphrase = st.checkbox("Ativar Paráfrase", value=True)
     show_original = st.checkbox("Mostrar Texto Original", value=False)
-    # Adicionar informações sobre o texto
     if input_text:
         st.write("Informações do texto:")
         st.write(f"Palavras no original: {len(input_text.split())}")
@@ -129,7 +145,7 @@ if st.button("Humanizar", type="primary"):
                 # Optional paraphrasing pass
                 if use_paraphrase:
-                    final_text = paraphrase_text(humanized_text, input_text)
                 else:
                     final_text = humanized_text

     st.session_state.models_loaded = True
+def clean_generated_text(text):
     """
+    Remove comandos e limpa o texto gerado
     """
+    # Lista de prefixos de comando para remover
+    command_prefixes = [
+        "reescreva o seguinte texto",
+        "reescreva este texto",
+        "reescreva o texto",
+        "traduza o seguinte texto",
+        "traduza este texto",
+        "traduza o texto",
+        "humanize:",
+        "humanizar:",
+        "em português de forma mais natural e humana",
+        "de forma mais natural e humana"
+    ]
+    # Remove os prefixos de comando
+    cleaned_text = text.lower()
+    for prefix in command_prefixes:
+        if cleaned_text.startswith(prefix.lower()):
+            cleaned_text = cleaned_text[len(prefix):].strip()
+    # Capitaliza a primeira letra
+    if cleaned_text:
+        cleaned_text = cleaned_text[0].upper() + cleaned_text[1:]
+    return cleaned_text
+def humanize_text(text):
     """
+    Humanize the input text using T5 model with improved coherence
     """
+    # Prepara o texto com contexto específico para melhor coerência
+    context = (
+        f"Contexto: Este é um texto técnico ou formal que precisa ser reescrito "
+        f"de forma mais natural, mantendo todas as informações importantes. "
+        f"Texto original: {text}"
+    )
+    input_ids = st.session_state.t5_tokenizer(
+        context,
         return_tensors="pt",
         max_length=1024,
         truncation=True
+    ).input_ids
+    outputs = st.session_state.t5_model.generate(
+        input_ids,
         max_length=1024,
+        min_length=len(text.split()),  # Mantém tamanho mínimo
         do_sample=True,
+        temperature=0.7,  # Ajustado para melhor equilíbrio
+        top_p=0.9,
+        num_beams=4,
+        no_repeat_ngram_size=2,
+        repetition_penalty=1.5,
+        length_penalty=1.2
     )
+    result = st.session_state.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return clean_generated_text(result)
+def paraphrase_text(text):
     """
+    Refina o texto humanizado mantendo a coerência
     """
+    inputs = st.session_state.paraphrase_tokenizer.encode(
+        text,
         return_tensors="pt",
         max_length=1024,
         truncation=True
+    )
+    outputs = st.session_state.paraphrase_model.generate(
+        inputs,
         max_length=1024,
+        min_length=len(text.split()),
         do_sample=True,
+        temperature=0.3,  # Reduzido para maior coerência
         top_p=0.95,
         repetition_penalty=1.2,
+        length_penalty=1.2
     )
+    result = st.session_state.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return clean_generated_text(result)
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
 st.title("🤖 → 🧑 Humanizador de Texto Avançado")
 st.markdown("""
 Este aplicativo transforma textos robotizados em linguagem mais natural e humana,
+mantendo todas as informações originais.
 """)
 # Input area with expanded capabilities
     use_paraphrase = st.checkbox("Ativar Paráfrase", value=True)
     show_original = st.checkbox("Mostrar Texto Original", value=False)
     if input_text:
         st.write("Informações do texto:")
         st.write(f"Palavras no original: {len(input_text.split())}")
                 # Optional paraphrasing pass
                 if use_paraphrase:
+                    final_text = paraphrase_text(humanized_text)
                 else:
                     final_text = humanized_text