import streamlit as st from transformers import AutoTokenizer, T5ForConditionalGeneration import torch # --- Streamlit Page Configuration --- st.set_page_config(layout="wide", page_title="Free Text Humanizer") # --- Custom CSS for Enhanced Aesthetics --- # This CSS block styles the Streamlit application to give it a modern, # user-friendly look with the 'Inter' font, rounded corners, and a # consistent pink/purple color scheme. st.markdown( """ """, unsafe_allow_html=True ) st.title("Free Text Humanizer (Public Models)") st.markdown("Rewrite text for a more natural, human-like tone using publicly available models.") @st.cache_resource def load_model_and_tokenizer(model_name): """ Loads a T5 model and tokenizer from Hugging Face for humanization. Ensures no authentication is needed. Moves the model to the appropriate device (GPU/CPU) immediately. """ try: tokenizer = AutoTokenizer.from_pretrained(model_name) model = T5ForConditionalGeneration.from_pretrained(model_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) st.success(f"Successfully loaded model: {model_name} on {device}") return tokenizer, model, device except Exception as e: st.error(f"Error loading model '{model_name}'. Please check the model name, ensure it's public, and verify your internet connection. Error: {e}") return None, None, None # --- Humanizer Model Configuration --- HUMANIZER_MODEL_NAME = "t5-base" # Using the T5-base model for paraphrasing/humanization HUMANIZER_MODEL_LABEL = "T5-base (for Humanizer)" # --- Load Humanizer Model --- HUMANIZER_MODEL_LOADED = {} with st.spinner(f"Loading humanizer model ({HUMANIZER_MODEL_LABEL})... This may take a moment."): tokenizer_humanizer, model_humanizer, device_humanizer = load_model_and_tokenizer(HUMANIZER_MODEL_NAME) if tokenizer_humanizer and model_humanizer: HUMANIZER_MODEL_LOADED[HUMANIZER_MODEL_LABEL] = (tokenizer_humanizer, model_humanizer, device_humanizer) # --- Check if Humanizer Model Loaded Successfully --- if not HUMANIZER_MODEL_LOADED: st.error("Humanizer model could not be loaded. Please check your internet connection or the model name.") st.stop() # Stop the app if the essential model isn't available # --- Text Input for Humanizer --- st.header("1. Text Humanizer") humanizer_text = st.text_area( "Paste text here to make it sound more natural:", height=300, placeholder="Type or paste text here to rewrite it with a more human-like tone.", key="humanizer_text_area" # Unique key for this Streamlit widget ) # --- Debugging Information for Input Text --- st.info(f"Humanizer input length: {len(humanizer_text)} characters.") if humanizer_text.strip(): st.info(f"First 50 characters of humanizer input: '{humanizer_text.strip()[:50]}...'") # --- Humanize Button Logic --- if st.button("Humanize Text", key="humanize_button"): st.write("Humanize button clicked! Starting rewriting process...") # Debugging feedback if not humanizer_text.strip(): st.warning("Please enter some text to humanize.") else: st.subheader("Humanized Text:") # Retrieve the loaded model components tokenizer_h, model_h, device_h = HUMANIZER_MODEL_LOADED[HUMANIZER_MODEL_LABEL] with st.spinner("Rewriting text..."): try: # T5 models often require a specific prefix for tasks like paraphrasing input_text_for_t5 = "paraphrase: " + humanizer_text st.info(f"Input to T5 model: '{input_text_for_t5[:100]}...'") # Debugging: show T5 input # Tokenize the input text and move it to the correct device (CPU/GPU) inputs = tokenizer_h(input_text_for_t5, return_tensors="pt", max_length=512, truncation=True) inputs = {k: v.to(device_h) for k, v in inputs.items()} st.info(f"Tokenized input length: {inputs['input_ids'].shape[1]}") # Debugging: token length # Generate the rewritten text using the T5 model # Increased max_length to allow for more substantial changes # num_beams: controls the number of beams for beam search (higher = better quality, slower) # early_stopping: stops generation when all beam hypotheses have finished outputs = model_h.generate( inputs["input_ids"], attention_mask=inputs["attention_mask"], max_length=256, # Adjusted max_length for potentially more varied output num_beams=8, # Increased num_beams for higher quality paraphrasing early_stopping=True, no_repeat_ngram_size=2 # Added to reduce repetitive phrases ) # Decode the generated token IDs back into human-readable text humanized_output = tokenizer_h.decode(outputs[0], skip_special_tokens=True) st.info(f"Generated output length: {len(humanized_output)} characters.") # Debugging: output length st.markdown("---") st.markdown("#### Original Text:") st.write(humanizer_text) # Display the original text st.markdown("#### Rewritten (Humanized) Text:") st.success(humanized_output) # Display the humanized text with a success style # --- New Debugging/Feedback for User --- if humanized_output.strip() == humanizer_text.strip(): st.warning("⚠️ The humanizer produced output identical to the input. This can happen with very short or simple texts, or if the model finds no significant changes to make.") elif len(humanized_output.strip()) < len(humanizer_text.strip()) * 0.5: # Arbitrary check for very short output st.warning("⚠️ The humanized output is significantly shorter than the original. The model might have truncated or simplified the text too much.") st.markdown( """ **Disclaimer:** This "Humanizer" uses a paraphrasing model to rewrite text for a more natural tone. It **does not guarantee** that the output will bypass AI detection tools. The effectiveness varies based on the input text and the complexity of the desired "human" style. """ ) except Exception as e: st.error(f"Failed to humanize text: {e}") st.exception(e) # Display full traceback for detailed debugging st.markdown("---") st.markdown("### How the Humanizer Works:") st.markdown( """ The "Text Humanizer" uses a **T5-base** model, a powerful text-to-text transformer. When you provide text, the model paraphrases it, aiming to: * Vary sentence structures and vocabulary. * Introduce more natural phrasing. * Potentially make the text less predictable to language models. **Important Note:** This tool is designed to *rewrite* text for improved naturalness. It **does not guarantee** that the output will bypass AI detection. The effectiveness of "humanization" is subjective and depends on the original text's characteristics and the specific AI detector being used. """ ) st.markdown("---") st.markdown("Made with ❤️ using Streamlit and Hugging Face Transformers.")