Spaces:

BugZoid
/

text-humanizer

Running

App Files Files Community

BugZoid commited on Jan 11, 2025

Commit

104c1bb

verified ·

1 Parent(s): aeb2715

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -3

app.py CHANGED Viewed

@@ -97,14 +97,72 @@ def fine_tune_model():
     st.session_state.model.eval()
 # Initialize session state
 if 'model_loaded' not in st.session_state:
     st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
     st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
     st.session_state.model_loaded = True
-# Rest of your existing functions (clean_generated_text and humanize_text remain the same)
-[Previous clean_generated_text and humanize_text functions remain unchanged]
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")

     st.session_state.model.eval()
+def clean_generated_text(text):
+    """Remove comandos e limpa o texto gerado"""
+    text = text.strip()
+    # Lista de prefixos de comando para remover
+    prefixes = [
+        "reescreva o seguinte texto",
+        "reescreva este texto",
+        "reescreva o texto",
+        "traduza",
+        "humanize:",
+        "humanizar:",
+        "em português",
+        "de forma mais natural"
+    ]
+    # Remove os prefixos de comando
+    text_lower = text.lower()
+    for prefix in prefixes:
+        if text_lower.startswith(prefix):
+            text = text[len(prefix):].strip()
+            text_lower = text.lower()
+    # Capitaliza a primeira letra
+    if text:
+        text = text[0].upper() + text[1:]
+    return text
+def humanize_text(text):
+    """Humaniza o texto mantendo coerência e tamanho"""
+    prompt = f"reescreva em português natural, mantendo todas as informações: {text}"
+    input_ids = st.session_state.tokenizer(
+        prompt,
+        return_tensors="pt",
+        max_length=1024,
+        truncation=True
+    ).input_ids
+    # Parâmetros ajustados para melhor coerência
+    outputs = st.session_state.model.generate(
+        input_ids,
+        max_length=1024,
+        min_length=len(text.split()),
+        do_sample=True,
+        temperature=0.1,
+        top_p=0.95,
+        num_beams=3,
+        repetition_penalty=1.2,
+        length_penalty=2.0
+    )
+    result = st.session_state.tokenizer.decode(outputs[0], skip_special_tokens=True)
+    result = clean_generated_text(result)
+    # Garante tamanho mínimo
+    while len(result.split()) < len(text.split()):
+        result += " " + " ".join(text.split()[-(len(text.split()) - len(result.split())):])
+    return result
 # Initialize session state
 if 'model_loaded' not in st.session_state:
     st.session_state.tokenizer = T5Tokenizer.from_pretrained("t5-base")
     st.session_state.model = T5ForConditionalGeneration.from_pretrained("t5-base")
     st.session_state.model_loaded = True
 # UI Components
 st.set_page_config(page_title="Advanced Text Humanizer", page_icon="🤖")