Spaces:

BugZoid
/

text-humanizer

Running

App Files Files Community

BugZoid commited on Jan 11, 2025

Commit

ee25ef1

verified ·

1 Parent(s): 21bb05d

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -12

app.py CHANGED Viewed

@@ -18,6 +18,16 @@ if 'models_loaded' not in st.session_state:
     st.session_state.models_loaded = True
 def clean_generated_text(text):
     """
     Remove comandos e limpa o texto gerado
@@ -52,11 +62,13 @@ def humanize_text(text):
     """
     Humanize the input text using T5 model with improved coherence
     """
     # Prepara o texto com contexto específico para melhor coerência
     context = (
         f"Contexto: Este é um texto técnico ou formal que precisa ser reescrito "
-        f"de forma mais natural, mantendo todas as informações importantes. "
-        f"Texto original: {text}"
     )
     input_ids = st.session_state.t5_tokenizer(
@@ -69,23 +81,26 @@ def humanize_text(text):
     outputs = st.session_state.t5_model.generate(
         input_ids,
         max_length=1024,
-        min_length=len(text.split()),  # Mantém tamanho mínimo
         do_sample=True,
         temperature=0.7,  # Ajustado para melhor equilíbrio
         top_p=0.9,
         num_beams=4,
         no_repeat_ngram_size=2,
         repetition_penalty=1.5,
-        length_penalty=1.2
     )
     result = st.session_state.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return clean_generated_text(result)
-def paraphrase_text(text):
     """
-    Refina o texto humanizado mantendo a coerência
     """
     inputs = st.session_state.paraphrase_tokenizer.encode(
         text,
         return_tensors="pt",
@@ -96,16 +111,17 @@ def paraphrase_text(text):
     outputs = st.session_state.paraphrase_model.generate(
         inputs,
         max_length=1024,
-        min_length=len(text.split()),
         do_sample=True,
         temperature=0.3,  # Reduzido para maior coerência
         top_p=0.95,
         repetition_penalty=1.2,
-        length_penalty=1.2
     )
     result = st.session_state.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return clean_generated_text(result)
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
@@ -113,7 +129,8 @@ st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
 st.title("🤖 → 🧑 Humanizador de Texto Avançado")
 st.markdown("""
 Este aplicativo transforma textos robotizados em linguagem mais natural e humana,
-mantendo todas as informações originais.
 """)
 # Input area with expanded capabilities
@@ -145,7 +162,7 @@ if st.button("Humanizar", type="primary"):
                 # Optional paraphrasing pass
                 if use_paraphrase:
-                    final_text = paraphrase_text(humanized_text)
                 else:
                     final_text = humanized_text

     st.session_state.models_loaded = True
+def ensure_minimum_length(text, original_text):
+    """
+    Garante que o texto gerado tenha pelo menos o mesmo tamanho do original
+    """
+    while len(text.split()) < len(original_text.split()):
+        missing_words = len(original_text.split()) - len(text.split())
+        if missing_words > 0:
+            text = text + " " + original_text[-missing_words:]
+    return text
 def clean_generated_text(text):
     """
     Remove comandos e limpa o texto gerado
     """
     Humanize the input text using T5 model with improved coherence
     """
+    min_length = len(text.split())
     # Prepara o texto com contexto específico para melhor coerência
     context = (
         f"Contexto: Este é um texto técnico ou formal que precisa ser reescrito "
+        f"de forma mais natural, mantendo todas as informações importantes e expandindo "
+        f"com detalhes relevantes. Texto original: {text}"
     )
     input_ids = st.session_state.t5_tokenizer(
     outputs = st.session_state.t5_model.generate(
         input_ids,
         max_length=1024,
+        min_length=min_length,  # Força o tamanho mínimo igual ao original
         do_sample=True,
         temperature=0.7,  # Ajustado para melhor equilíbrio
         top_p=0.9,
         num_beams=4,
         no_repeat_ngram_size=2,
         repetition_penalty=1.5,
+        length_penalty=2.0  # Aumentado para favorecer textos mais longos
     )
     result = st.session_state.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    result = clean_generated_text(result)
+    return ensure_minimum_length(result, text)
+def paraphrase_text(text, original_text):
     """
+    Refina o texto humanizado mantendo a coerência e tamanho
     """
+    min_length = len(original_text.split())
     inputs = st.session_state.paraphrase_tokenizer.encode(
         text,
         return_tensors="pt",
     outputs = st.session_state.paraphrase_model.generate(
         inputs,
         max_length=1024,
+        min_length=min_length,  # Força o tamanho mínimo igual ao original
         do_sample=True,
         temperature=0.3,  # Reduzido para maior coerência
         top_p=0.95,
         repetition_penalty=1.2,
+        length_penalty=2.0  # Aumentado para favorecer textos mais longos
     )
     result = st.session_state.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    result = clean_generated_text(result)
+    return ensure_minimum_length(result, original_text)
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
 st.title("🤖 → 🧑 Humanizador de Texto Avançado")
 st.markdown("""
 Este aplicativo transforma textos robotizados em linguagem mais natural e humana,
+mantendo todas as informações originais e garantindo que o texto final seja pelo menos
+do mesmo tamanho que o original.
 """)
 # Input area with expanded capabilities
                 # Optional paraphrasing pass
                 if use_paraphrase:
+                    final_text = paraphrase_text(humanized_text, input_text)
                 else:
                     final_text = humanized_text