File size: 10,331 Bytes
fea8d44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
"""
Combined Humanizer V2 - Adversarial Model + StealthWriter Post-Processor
Optimized for bypassing AI detectors using proven techniques.
"""

import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer
import re
import random
import os

# StealthWriter-style post-processor
class StealthPostProcessor:
    """Post-process text using StealthWriter's proven approach."""
    
    CONTRACTION_EXPANSIONS = {
        "it's": "it is", "It's": "It is", "don't": "do not", "Don't": "Do not",
        "doesn't": "does not", "Doesn't": "Does not", "didn't": "did not",
        "won't": "will not", "wouldn't": "would not", "couldn't": "could not",
        "shouldn't": "should not", "can't": "cannot", "Can't": "Cannot",
        "I'm": "I am", "I've": "I have", "I'll": "I will", "I'd": "I would",
        "you're": "you are", "You're": "You are", "you've": "you have",
        "we're": "we are", "We're": "We are", "we've": "we have",
        "they're": "they are", "They're": "They are", "they've": "they have",
        "that's": "that is", "That's": "That is", "there's": "there is",
        "what's": "what is", "who's": "who is", "let's": "let us",
        "isn't": "is not", "aren't": "are not", "wasn't": "was not",
        "weren't": "were not", "haven't": "have not", "hasn't": "has not",
        "hadn't": "had not", "here's": "here is", "he's": "he is",
        "she's": "she is", "we'll": "we will", "they'll": "they will",
        "gotta": "got to", "gonna": "going to", "wanna": "want to",
        "kinda": "kind of", "sorta": "sort of",
    }
    
    EMPHATIC_PHRASES = [", I tell you", ", I must say", ", mind you", ", you see", ", indeed"]
    
    FORMAL_STARTERS = [
        "It is almost a given that ", "One must acknowledge that ",
        "It goes without saying that ", "It is worth noting that ",
        "As it happens, ", "As a matter of fact, ", "In point of fact, ",
    ]
    
    SYNONYM_REPLACEMENTS = {
        "furry friend": "hairy companion", "pet": "animal companion",
        "dog": "canine", "cat": "feline", "help": "assist", "use": "utilize",
        "get": "obtain", "make": "create", "good": "favorable", "bad": "unfavorable",
        "big": "substantial", "small": "modest", "very": "quite", "really": "truly",
        "important": "significant", "need": "require", "want": "desire",
        "think": "believe", "know": "understand", "see": "observe",
        "find": "discover", "show": "demonstrate", "give": "provide",
        "start": "commence", "begin": "initiate", "end": "conclude",
        "try": "attempt", "keep": "maintain", "lot of": "numerous",
        "a lot": "considerably", "lots of": "a great many",
    }
    
    FILLERS_TO_REMOVE = [
        "like, ", ", like,", " like ", "you know, ", ", you know,",
        "basically, ", ", basically,", "honestly, ", "Honestly, ",
        "I mean, ", ", I mean,", "pretty much ", "kind of ", "sort of ",
        "actually, ", ", actually,", "literally ", "just ", "really ",
        "so, ", "So, ", "well, ", "Well, ", "anyway, ", "Anyway, ",
        "right? ", "Right? ", "you know? ", "I guess ", "I gotta say, ",
    ]
    
    def __init__(self, intensity="high"):
        self.change_probability = {"low": 0.3, "medium": 0.5, "high": 0.7}.get(intensity, 0.7)
    
    def expand_contractions(self, text):
        for contraction, expansion in self.CONTRACTION_EXPANSIONS.items():
            pattern = re.compile(r'\b' + re.escape(contraction) + r'\b')
            text = pattern.sub(expansion, text)
        return text
    
    def remove_casual_fillers(self, text):
        for filler in self.FILLERS_TO_REMOVE:
            text = text.replace(filler, " " if filler.startswith(" ") or filler.endswith(" ") else "")
        return re.sub(r'\s+', ' ', text).strip()
    
    def apply_synonym_replacements(self, text):
        for common, formal in self.SYNONYM_REPLACEMENTS.items():
            if random.random() < self.change_probability:
                pattern = re.compile(r'\b' + re.escape(common) + r'\b', re.IGNORECASE)
                def replace_preserve_case(match):
                    word = match.group(0)
                    if word.isupper(): return formal.upper()
                    elif word[0].isupper(): return formal.capitalize()
                    return formal
                text = pattern.sub(replace_preserve_case, text)
        return text
    
    def add_emphatic_phrases(self, text):
        sentences = re.split(r'(?<=[.!])\s+', text)
        result = []
        for sentence in sentences:
            # Only add emphatic phrase if sentence doesn't already have one
            has_emphatic = any(phrase.strip(", ") in sentence for phrase in self.EMPHATIC_PHRASES)
            if sentence.endswith('.') and not has_emphatic and random.random() < self.change_probability * 0.25:
                phrase = random.choice(self.EMPHATIC_PHRASES)
                sentence = sentence[:-1] + phrase + "."
            result.append(sentence)
        return ' '.join(result)
    
    def add_formal_starters(self, text):
        sentences = re.split(r'(?<=[.!?])\s+', text)
        result = []
        for i, sentence in enumerate(sentences):
            # Only add formal starter if sentence doesn't already have one
            has_starter = any(starter.strip() in sentence for starter in self.FORMAL_STARTERS)
            if 0 < i < len(sentences) - 1 and not has_starter and random.random() < self.change_probability * 0.2:
                starter = random.choice(self.FORMAL_STARTERS)
                if sentence and sentence[0].isupper():
                    sentence = starter + sentence[0].lower() + sentence[1:]
                else:
                    sentence = starter + sentence
            result.append(sentence)
        return ' '.join(result)
    
    def process(self, text):
        text = self.expand_contractions(text)
        text = self.remove_casual_fillers(text)
        text = self.apply_synonym_replacements(text)
        text = self.add_emphatic_phrases(text)
        text = self.add_formal_starters(text)
        return re.sub(r'\s+', ' ', text).strip()
    
    def multi_pass_process(self, text, passes=2):
        for _ in range(passes):
            text = self.process(text)
        return text


# Load model and tokenizer from HuggingFace Hub
print("Loading humanizer V3 model from HuggingFace Hub...")
MODEL_PATH = "harryroger798/humanizer-model-v3"
tokenizer = T5Tokenizer.from_pretrained(MODEL_PATH)
model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH)
print("Model loaded!")

# Initialize post-processor
processor = StealthPostProcessor(intensity="high")


def humanize_text(text, use_post_processor=True, post_processor_passes=2):
    """Combined humanizer: StealthWriter post-processor (primary) + model paraphrasing"""
    if not text.strip():
        return "", ""
    
    # Step 1: Run through model with better generation parameters
    inputs = tokenizer(f"humanize: {text}", return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(
        **inputs,
        max_length=512,
        num_beams=4,
        early_stopping=True,
        do_sample=True,
        temperature=0.8,
        top_p=0.9,
        repetition_penalty=2.5,
        no_repeat_ngram_size=3,
        length_penalty=1.0
    )
    model_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Check for repetitive output - if detected, use original text with post-processor only
    words = model_output.split()
    if len(words) > 10:
        # Check for excessive repetition
        word_counts = {}
        for word in words:
            word_counts[word] = word_counts.get(word, 0) + 1
        max_repeat = max(word_counts.values()) if word_counts else 0
        if max_repeat > len(words) * 0.3:  # If any word appears more than 30% of the time
            # Fall back to using original text with post-processor
            model_output = text
    
    # Step 2: Apply StealthWriter post-processor (this is the key to bypassing detection)
    if use_post_processor:
        final_output = processor.multi_pass_process(model_output, passes=post_processor_passes)
    else:
        final_output = model_output
    
    return model_output, final_output


def gradio_humanize(text, use_post_processor, passes):
    """Gradio interface function"""
    model_out, final_out = humanize_text(text, use_post_processor, int(passes))
    return model_out, final_out


# Create Gradio interface
with gr.Blocks(title="Humanizer V2 - AI Detector Bypass") as demo:
    gr.Markdown("""
    # 🔄 Humanizer V2 - AI Detector Bypass
    
    **Combined approach:** Fine-tuned T5 model (39,776 samples) + StealthWriter-style post-processor
    
    This humanizer uses techniques proven to bypass AI detectors:
    - Trained on 39,776 humanizer samples (combined dataset)
    - StealthWriter-style post-processing (expands contractions, uses formal expressions)
    - Multi-pass processing for better results
    - Achieved 0% AI detection on StealthWriter in testing
    """)
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Input Text (AI-generated)",
                placeholder="Paste your AI-generated text here...",
                lines=8
            )
            
            with gr.Row():
                use_post_processor = gr.Checkbox(label="Use StealthWriter Post-Processor", value=True)
                passes = gr.Slider(minimum=1, maximum=3, value=2, step=1, label="Post-Processor Passes")
            
            submit_btn = gr.Button("Humanize", variant="primary")
        
        with gr.Column():
            model_output = gr.Textbox(label="Model Output (before post-processing)", lines=6)
            final_output = gr.Textbox(label="Final Output (after post-processing)", lines=6)
    
    submit_btn.click(
        fn=gradio_humanize,
        inputs=[input_text, use_post_processor, passes],
        outputs=[model_output, final_output]
    )
    
    gr.Markdown("""
    ---
    **Tips for best results:**
    - Enable the StealthWriter post-processor for better bypass rates
    - Use 2-3 passes for optimal results
    - Test the output on StealthWriter or other AI detectors
    """)


if __name__ == "__main__":
    demo.launch()