Spaces:

Chungulus
/

Humanizer_Pro

Sleeping

File size: 10,331 Bytes

fea8d44

"""
Combined Humanizer V2 - Adversarial Model + StealthWriter Post-Processor
Optimized for bypassing AI detectors using proven techniques.
"""

import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer
import re
import random
import os

# StealthWriter-style post-processor
class StealthPostProcessor:
    """Post-process text using StealthWriter's proven approach."""
    
    CONTRACTION_EXPANSIONS = {
        "it's": "it is", "It's": "It is", "don't": "do not", "Don't": "Do not",
        "doesn't": "does not", "Doesn't": "Does not", "didn't": "did not",
        "won't": "will not", "wouldn't": "would not", "couldn't": "could not",
        "shouldn't": "should not", "can't": "cannot", "Can't": "Cannot",
        "I'm": "I am", "I've": "I have", "I'll": "I will", "I'd": "I would",
        "you're": "you are", "You're": "You are", "you've": "you have",
        "we're": "we are", "We're": "We are", "we've": "we have",
        "they're": "they are", "They're": "They are", "they've": "they have",
        "that's": "that is", "That's": "That is", "there's": "there is",
        "what's": "what is", "who's": "who is", "let's": "let us",
        "isn't": "is not", "aren't": "are not", "wasn't": "was not",
        "weren't": "were not", "haven't": "have not", "hasn't": "has not",
        "hadn't": "had not", "here's": "here is", "he's": "he is",
        "she's": "she is", "we'll": "we will", "they'll": "they will",
        "gotta": "got to", "gonna": "going to", "wanna": "want to",
        "kinda": "kind of", "sorta": "sort of",
    }
    
    EMPHATIC_PHRASES = [", I tell you", ", I must say", ", mind you", ", you see", ", indeed"]
    
    FORMAL_STARTERS = [
        "It is almost a given that ", "One must acknowledge that ",
        "It goes without saying that ", "It is worth noting that ",
        "As it happens, ", "As a matter of fact, ", "In point of fact, ",
    ]
    
    SYNONYM_REPLACEMENTS = {
        "furry friend": "hairy companion", "pet": "animal companion",
        "dog": "canine", "cat": "feline", "help": "assist", "use": "utilize",
        "get": "obtain", "make": "create", "good": "favorable", "bad": "unfavorable",
        "big": "substantial", "small": "modest", "very": "quite", "really": "truly",
        "important": "significant", "need": "require", "want": "desire",
        "think": "believe", "know": "understand", "see": "observe",
        "find": "discover", "show": "demonstrate", "give": "provide",
        "start": "commence", "begin": "initiate", "end": "conclude",
        "try": "attempt", "keep": "maintain", "lot of": "numerous",
        "a lot": "considerably", "lots of": "a great many",
    }
    
    FILLERS_TO_REMOVE = [
        "like, ", ", like,", " like ", "you know, ", ", you know,",
        "basically, ", ", basically,", "honestly, ", "Honestly, ",
        "I mean, ", ", I mean,", "pretty much ", "kind of ", "sort of ",
        "actually, ", ", actually,", "literally ", "just ", "really ",
        "so, ", "So, ", "well, ", "Well, ", "anyway, ", "Anyway, ",
        "right? ", "Right? ", "you know? ", "I guess ", "I gotta say, ",
    ]
    
    def __init__(self, intensity="high"):
        self.change_probability = {"low": 0.3, "medium": 0.5, "high": 0.7}.get(intensity, 0.7)
    
    def expand_contractions(self, text):
        for contraction, expansion in self.CONTRACTION_EXPANSIONS.items():
            pattern = re.compile(r'\b' + re.escape(contraction) + r'\b')
            text = pattern.sub(expansion, text)
        return text
    
    def remove_casual_fillers(self, text):
        for filler in self.FILLERS_TO_REMOVE:
            text = text.replace(filler, " " if filler.startswith(" ") or filler.endswith(" ") else "")
        return re.sub(r'\s+', ' ', text).strip()
    
    def apply_synonym_replacements(self, text):
        for common, formal in self.SYNONYM_REPLACEMENTS.items():
            if random.random() < self.change_probability:
                pattern = re.compile(r'\b' + re.escape(common) + r'\b', re.IGNORECASE)
                def replace_preserve_case(match):
                    word = match.group(0)
                    if word.isupper(): return formal.upper()
                    elif word[0].isupper(): return formal.capitalize()
                    return formal
                text = pattern.sub(replace_preserve_case, text)
        return text
    
    def add_emphatic_phrases(self, text):
        sentences = re.split(r'(?<=[.!])\s+', text)
        result = []
        for sentence in sentences:
            # Only add emphatic phrase if sentence doesn't already have one
            has_emphatic = any(phrase.strip(", ") in sentence for phrase in self.EMPHATIC_PHRASES)
            if sentence.endswith('.') and not has_emphatic and random.random() < self.change_probability * 0.25:
                phrase = random.choice(self.EMPHATIC_PHRASES)
                sentence = sentence[:-1] + phrase + "."
            result.append(sentence)
        return ' '.join(result)
    
    def add_formal_starters(self, text):
        sentences = re.split(r'(?<=[.!?])\s+', text)
        result = []
        for i, sentence in enumerate(sentences):
            # Only add formal starter if sentence doesn't already have one
            has_starter = any(starter.strip() in sentence for starter in self.FORMAL_STARTERS)
            if 0 < i < len(sentences) - 1 and not has_starter and random.random() < self.change_probability * 0.2:
                starter = random.choice(self.FORMAL_STARTERS)
                if sentence and sentence[0].isupper():
                    sentence = starter + sentence[0].lower() + sentence[1:]
                else:
                    sentence = starter + sentence
            result.append(sentence)
        return ' '.join(result)
    
    def process(self, text):
        text = self.expand_contractions(text)
        text = self.remove_casual_fillers(text)
        text = self.apply_synonym_replacements(text)
        text = self.add_emphatic_phrases(text)
        text = self.add_formal_starters(text)
        return re.sub(r'\s+', ' ', text).strip()
    
    def multi_pass_process(self, text, passes=2):
        for _ in range(passes):
            text = self.process(text)
        return text


# Load model and tokenizer from HuggingFace Hub
print("Loading humanizer V3 model from HuggingFace Hub...")
MODEL_PATH = "harryroger798/humanizer-model-v3"
tokenizer = T5Tokenizer.from_pretrained(MODEL_PATH)
model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH)
print("Model loaded!")

# Initialize post-processor
processor = StealthPostProcessor(intensity="high")


def humanize_text(text, use_post_processor=True, post_processor_passes=2):
    """Combined humanizer: StealthWriter post-processor (primary) + model paraphrasing"""
    if not text.strip():
        return "", ""
    
    # Step 1: Run through model with better generation parameters
    inputs = tokenizer(f"humanize: {text}", return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(
        **inputs,
        max_length=512,
        num_beams=4,
        early_stopping=True,
        do_sample=True,
        temperature=0.8,
        top_p=0.9,
        repetition_penalty=2.5,
        no_repeat_ngram_size=3,
        length_penalty=1.0
    )
    model_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Check for repetitive output - if detected, use original text with post-processor only
    words = model_output.split()
    if len(words) > 10:
        # Check for excessive repetition
        word_counts = {}
        for word in words:
            word_counts[word] = word_counts.get(word, 0) + 1
        max_repeat = max(word_counts.values()) if word_counts else 0
        if max_repeat > len(words) * 0.3:  # If any word appears more than 30% of the time
            # Fall back to using original text with post-processor
            model_output = text
    
    # Step 2: Apply StealthWriter post-processor (this is the key to bypassing detection)
    if use_post_processor:
        final_output = processor.multi_pass_process(model_output, passes=post_processor_passes)
    else:
        final_output = model_output
    
    return model_output, final_output


def gradio_humanize(text, use_post_processor, passes):
    """Gradio interface function"""
    model_out, final_out = humanize_text(text, use_post_processor, int(passes))
    return model_out, final_out


# Create Gradio interface
with gr.Blocks(title="Humanizer V2 - AI Detector Bypass") as demo:
    gr.Markdown("""
    # 🔄 Humanizer V2 - AI Detector Bypass
    
    **Combined approach:** Fine-tuned T5 model (39,776 samples) + StealthWriter-style post-processor
    
    This humanizer uses techniques proven to bypass AI detectors:
    - Trained on 39,776 humanizer samples (combined dataset)
    - StealthWriter-style post-processing (expands contractions, uses formal expressions)
    - Multi-pass processing for better results
    - Achieved 0% AI detection on StealthWriter in testing
    """)
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Input Text (AI-generated)",
                placeholder="Paste your AI-generated text here...",
                lines=8
            )
            
            with gr.Row():
                use_post_processor = gr.Checkbox(label="Use StealthWriter Post-Processor", value=True)
                passes = gr.Slider(minimum=1, maximum=3, value=2, step=1, label="Post-Processor Passes")
            
            submit_btn = gr.Button("Humanize", variant="primary")
        
        with gr.Column():
            model_output = gr.Textbox(label="Model Output (before post-processing)", lines=6)
            final_output = gr.Textbox(label="Final Output (after post-processing)", lines=6)
    
    submit_btn.click(
        fn=gradio_humanize,
        inputs=[input_text, use_post_processor, passes],
        outputs=[model_output, final_output]
    )
    
    gr.Markdown("""
    ---
    **Tips for best results:**
    - Enable the StealthWriter post-processor for better bypass rates
    - Use 2-3 passes for optimal results
    - Test the output on StealthWriter or other AI detectors
    """)


if __name__ == "__main__":
    demo.launch()