Spaces:

BugZoid
/

text-humanizer

Running

App Files Files Community

BugZoid commited on Jan 11, 2025

Commit

bf2a95e

verified ·

1 Parent(s): ee25ef1

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -127

app.py CHANGED Viewed

@@ -1,127 +1,72 @@
 import streamlit as st
-from transformers import (
-    AutoTokenizer,
-    AutoModelForSeq2SeqLM,
-    T5ForConditionalGeneration,
-    T5Tokenizer
-)
-# Initialize session state for models if not already done
-if 'models_loaded' not in st.session_state:
-    # Load the main T5 model and tokenizer (using t5-base for better quality)
-    st.session_state.t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
-    st.session_state.t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")
-    # Load the paraphrasing model and tokenizer
-    st.session_state.paraphrase_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
-    st.session_state.paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
-    st.session_state.models_loaded = True
-def ensure_minimum_length(text, original_text):
-    """
-    Garante que o texto gerado tenha pelo menos o mesmo tamanho do original
-    """
-    while len(text.split()) < len(original_text.split()):
-        missing_words = len(original_text.split()) - len(text.split())
-        if missing_words > 0:
-            text = text + " " + original_text[-missing_words:]
-    return text
-def clean_generated_text(text):
-    """
-    Remove comandos e limpa o texto gerado
-    """
     # Lista de prefixos de comando para remover
-    command_prefixes = [
         "reescreva o seguinte texto",
         "reescreva este texto",
         "reescreva o texto",
-        "traduza o seguinte texto",
-        "traduza este texto",
-        "traduza o texto",
         "humanize:",
         "humanizar:",
-        "em português de forma mais natural e humana",
-        "de forma mais natural e humana"
     ]
-    # Remove os prefixos de comando
-    cleaned_text = text.lower()
-    for prefix in command_prefixes:
-        if cleaned_text.startswith(prefix.lower()):
-            cleaned_text = cleaned_text[len(prefix):].strip()
     # Capitaliza a primeira letra
-    if cleaned_text:
-        cleaned_text = cleaned_text[0].upper() + cleaned_text[1:]
-    return cleaned_text
-def humanize_text(text):
-    """
-    Humanize the input text using T5 model with improved coherence
-    """
-    min_length = len(text.split())
-    # Prepara o texto com contexto específico para melhor coerência
-    context = (
-        f"Contexto: Este é um texto técnico ou formal que precisa ser reescrito "
-        f"de forma mais natural, mantendo todas as informações importantes e expandindo "
-        f"com detalhes relevantes. Texto original: {text}"
-    )
-    input_ids = st.session_state.t5_tokenizer(
-        context,
         return_tensors="pt",
-        max_length=1024,
         truncation=True
     ).input_ids
-    outputs = st.session_state.t5_model.generate(
         input_ids,
-        max_length=1024,
-        min_length=min_length,  # Força o tamanho mínimo igual ao original
         do_sample=True,
-        temperature=0.7,  # Ajustado para melhor equilíbrio
-        top_p=0.9,
-        num_beams=4,
-        no_repeat_ngram_size=2,
-        repetition_penalty=1.5,
-        length_penalty=2.0  # Aumentado para favorecer textos mais longos
     )
-    result = st.session_state.t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
     result = clean_generated_text(result)
-    return ensure_minimum_length(result, text)
-def paraphrase_text(text, original_text):
-    """
-    Refina o texto humanizado mantendo a coerência e tamanho
-    """
-    min_length = len(original_text.split())
-    inputs = st.session_state.paraphrase_tokenizer.encode(
-        text,
-        return_tensors="pt",
-        max_length=1024,
-        truncation=True
-    )
-    outputs = st.session_state.paraphrase_model.generate(
-        inputs,
-        max_length=1024,
-        min_length=min_length,  # Força o tamanho mínimo igual ao original
-        do_sample=True,
-        temperature=0.3,  # Reduzido para maior coerência
-        top_p=0.95,
-        repetition_penalty=1.2,
-        length_penalty=2.0  # Aumentado para favorecer textos mais longos
-    )
-    result = st.session_state.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    result = clean_generated_text(result)
-    return ensure_minimum_length(result, original_text)
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
@@ -140,45 +85,31 @@ input_text = st.text_area(
     help="Cole seu texto aqui para transformá-lo em uma versão mais natural e humana."
 )
-# Advanced settings in sidebar
-with st.sidebar:
-    st.header("Configurações Avançadas")
-    use_paraphrase = st.checkbox("Ativar Paráfrase", value=True)
-    show_original = st.checkbox("Mostrar Texto Original", value=False)
-    if input_text:
-        st.write("Informações do texto:")
-        st.write(f"Palavras no original: {len(input_text.split())}")
-# Process button with error handling
 if st.button("Humanizar", type="primary"):
     if not input_text:
-        st.warning("⚠️ Por favor, cole um texto de robô primeiro!")
     else:
         with st.spinner("Processando o texto..."):
             try:
-                # First humanization pass
-                humanized_text = humanize_text(input_text)
-                # Optional paraphrasing pass
-                if use_paraphrase:
-                    final_text = paraphrase_text(humanized_text, input_text)
-                else:
-                    final_text = humanized_text
                 # Display results
                 st.success("✨ Texto humanizado:")
-                if show_original:
-                    st.text("Texto original:")
                     st.info(input_text)
-                    st.write(f"Palavras no original: {len(input_text.split())}")
-                st.markdown("**Resultado:**")
-                st.write(final_text)
-                st.write(f"Palavras no resultado: {len(final_text.split())}")
             except Exception as e:
-                st.error(f"❌ Ocorreu um erro durante o processamento: {str(e)}")
 # Footer
 st.markdown("---")
 st.markdown(

 import streamlit as st
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+# Initialize session state for model if not already done
+if 'model_loaded' not in st.session_state:
+    st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
+    st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
+    st.session_state.model_loaded = True
+    def clean_generated_text(text):
+    """Remove comandos e limpa o texto gerado"""
+    text = text.strip()
     # Lista de prefixos de comando para remover
+    prefixes = [
         "reescreva o seguinte texto",
         "reescreva este texto",
         "reescreva o texto",
+        "traduza",
         "humanize:",
         "humanizar:",
+        "em português",
+        "de forma mais natural"
     ]
+# Remove os prefixos de comando
+    text_lower = text.lower()
+    for prefix in prefixes:
+        if text_lower.startswith(prefix):
+            text = text[len(prefix):].strip()
+            text_lower = text.lower()
     # Capitaliza a primeira letra
+    if text:
+        text = text[0].upper() + text[1:]
+    return text
+    def humanize_text(text):
+    """Humaniza o texto mantendo coerência e tamanho"""
+    prompt = f"reescreva em português natural, mantendo todas as informações: {text}"
+    input_ids = st.session_state.tokenizer(
+        prompt,
         return_tensors="pt",
+        max_length=512,
         truncation=True
     ).input_ids
+    # Parâmetros ajustados para melhor coerência
+    outputs = st.session_state.model.generate(
         input_ids,
+        max_length=1024,  # 512
+        min_length=len(text.split()), # min_length=min_length,
         do_sample=True,
+        temperature=0.3,      # Reduzido para maior coerência
+        top_p=0.95,          # Ajustado para melhor seleção de palavras
+        num_beams=3,         # Reduzido para maior velocidade
+        repetition_penalty=1.2,
+        length_penalty=2.0    # Mantém incentivo para textos mais longos
     )
+    result = st.session_state.tokenizer.decode(outputs[0], skip_special_tokens=True)
     result = clean_generated_text(result)
+    # Garante tamanho mínimo
+    while len(result.split()) < len(text.split()):
+        result += " " + " ".join(text.split()[-(len(text.split()) - len(result.split())):])
+    return result
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")
     help="Cole seu texto aqui para transformá-lo em uma versão mais natural e humana."
 )
+# Process button
 if st.button("Humanizar", type="primary"):
     if not input_text:
+        st.warning("⚠️ Por favor, cole um texto primeiro!")
     else:
         with st.spinner("Processando o texto..."):
             try:
+                final_text = humanize_text(input_text)
                 # Display results
                 st.success("✨ Texto humanizado:")
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.text("Original:")
                     st.info(input_text)
+                    st.write(f"Palavras: {len(input_text.split())}")
+                with col2:
+                    st.text("Resultado:")
+                    st.info(final_text)
+                    st.write(f"Palavras: {len(final_text.split())}")
             except Exception as e:
+                st.error(f"❌ Erro no processamento: {str(e)}")
 # Footer
 st.markdown("---")
 st.markdown(