Spaces:

EdysorEdutech
/

human_final

Paused

App Files Files Community

EdysorEdutech commited on Aug 2

Commit

e372523

verified ·

1 Parent(s): e2ec5b0

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -357

app.py CHANGED Viewed

@@ -25,14 +25,7 @@ except:
 class HumanLikeVariations:
     """Add human-like variations and intentional imperfections"""
-    def __init__(self, contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
-                 typo_prob=0.02, natural_error_prob=0.05):
-        self.contraction_prob = contraction_prob
-        self.oxford_comma_prob = oxford_comma_prob
-        self.which_that_prob = which_that_prob
-        self.typo_prob = typo_prob
-        self.natural_error_prob = natural_error_prob
         # Common human writing patterns - EXPANDED for Originality AI
         self.casual_transitions = [
              "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
@@ -165,8 +158,8 @@ class HumanLikeVariations:
             # Always use contractions where natural
             sent = self.apply_contractions(sent)
-            # Add VERY occasional natural errors (based on parameter)
-            if random.random() < self.natural_error_prob and len(sent.split()) > 15:
                 error_types = [
                     # Missing comma in compound sentence
                     lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
@@ -206,30 +199,30 @@ class HumanLikeVariations:
         }
         for full, contr in contractions.items():
-            if random.random() < self.contraction_prob:  # Use configurable probability
                 text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
         return text
     def add_minor_errors(self, text):
         """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
-        # Occasionally miss Oxford comma (based on parameter)
-        if random.random() < self.oxford_comma_prob:
             # Only in lists, not random commas
             text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
-        # Sometimes use 'which' instead of 'that' (based on parameter)
-        if random.random() < self.which_that_prob:
             # Only for non-restrictive clauses
             matches = re.finditer(r'\b(\w+) that (\w+)', text)
             for match in list(matches)[:1]:  # Only first occurrence
                 if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
                     text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
-        # NEW: Add very occasional typos (based on parameter) - REDUCED AND CONTROLLED
         sentences = text.split('. ')
         for i, sent in enumerate(sentences):
-            if random.random() < self.typo_prob and len(sent.split()) > 15:  # Only in longer sentences
                 words = sent.split()
                 # Pick a random word to potentially typo
                 word_idx = random.randint(len(words)//2, len(words)-2)  # Avoid start/end
@@ -274,7 +267,7 @@ class HumanLikeVariations:
         return text
-    def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
         """Add natural human writing patterns that Originality AI associates with human text"""
         sentences = self.split_into_sentences_advanced(text)
         result_sentences = []
@@ -286,8 +279,8 @@ class HumanLikeVariations:
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
-            # Add natural speech patterns (based on parameter)
-            if random.random() < speech_prob and len(sentence.split()) > 10:
                 # Natural interruptions that humans actually use
                 if random.random() < 0.5:
                     # Add "you know" or "I mean" naturally
@@ -304,8 +297,8 @@ class HumanLikeVariations:
                     openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
                     sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
-            # Add subtle errors that humans make (based on parameter)
-            if random.random() < error_prob:
                 words = sentence.split()
                 if len(words) > 5:
                     # Common comma omissions
@@ -318,8 +311,8 @@ class HumanLikeVariations:
                             words.insert(idx+1, words[idx])
                             sentence = ' '.join(words)
-            # Natural sentence combinations (based on parameter)
-            if i < len(sentences) - 1 and random.random() < combine_prob:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
                     # Natural connectors based on content
@@ -551,8 +544,11 @@ class EnhancedDipperHumanizer:
         except:
             print("BART model not available")
             self.use_bart = False
-    def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
         """Add natural human writing patterns that Originality AI associates with human text"""
         sentences = self.split_into_sentences_advanced(text)
         result_sentences = []
@@ -564,8 +560,8 @@ class EnhancedDipperHumanizer:
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
-            # Add natural speech patterns (based on parameter)
-            if random.random() < speech_prob and len(sentence.split()) > 10:
                 # Natural interruptions that humans actually use
                 if random.random() < 0.5:
                     # Add "you know" or "I mean" naturally
@@ -582,8 +578,8 @@ class EnhancedDipperHumanizer:
                     openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
                     sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
-            # Add subtle errors that humans make (based on parameter)
-            if random.random() < error_prob:
                 words = sentence.split()
                 if len(words) > 5:
                     # Common comma omissions
@@ -596,8 +592,8 @@ class EnhancedDipperHumanizer:
                             words.insert(idx+1, words[idx])
                             sentence = ' '.join(words)
-            # Natural sentence combinations (based on parameter)
-            if i < len(sentences) - 1 and random.random() < combine_prob:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
                     # Natural connectors based on content
@@ -864,9 +860,7 @@ class EnhancedDipperHumanizer:
         return text.strip()
-    def paraphrase_with_dipper(self, text, lex_diversity=75, order_diversity=30,
-                             temperature=0.85, top_p=0.92, length_multiplier=1.4,
-                             no_repeat_ngram=4):
         """Paraphrase text using Dipper model with sentence-level processing"""
         if not text or len(text.strip()) < 3:
             return text
@@ -884,17 +878,16 @@ class EnhancedDipperHumanizer:
                 continue
             try:
-                # Apply diversity settings based on sentence length
                 if len(sentence.split()) < 10:
-                    # Use slightly lower diversity for short sentences
-                    actual_lex = max(lex_diversity - 5, 50)
-                    actual_order = max(order_diversity - 5, 15)
                 else:
-                    actual_lex = lex_diversity
-                    actual_order = order_diversity
-                lex_code = int(100 - actual_lex)
-                order_code = int(100 - actual_order)
                 # Format input for Dipper
                 if self.is_dipper:
@@ -920,7 +913,11 @@ class EnhancedDipperHumanizer:
                 # Generate with appropriate variation
                 original_length = len(sentence.split())
-                max_new_length = int(original_length * length_multiplier)
                 with torch.no_grad():
                     outputs = self.model.generate(
@@ -928,9 +925,9 @@ class EnhancedDipperHumanizer:
                         max_length=max_new_length + 20,
                         min_length=max(5, int(original_length * 0.7)),
                         do_sample=True,
-                        top_p=top_p,
-                        temperature=temperature,
-                        no_repeat_ngram_size=no_repeat_ngram,
                         num_beams=1,  # Greedy for more randomness
                         early_stopping=True
                     )
@@ -967,6 +964,9 @@ class EnhancedDipperHumanizer:
         # Join sentences back
         result = ' '.join(paraphrased_sentences)
         return result
     def fix_incomplete_sentence_smart(self, generated, original):
@@ -1055,7 +1055,7 @@ class EnhancedDipperHumanizer:
         # Clean up sentences
         return [s for s in sentences if s and len(s.strip()) > 0]
-    def paraphrase_with_bart(self, text, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2):
         """Additional paraphrasing with BART for more variation"""
         if not self.use_bart or not text or len(text.strip()) < 3:
             return text
@@ -1091,10 +1091,10 @@ class EnhancedDipperHumanizer:
                         **inputs,
                         max_length=int(original_length * 1.4) + 10,
                         min_length=max(5, int(original_length * 0.6)),
-                        num_beams=bart_beams,
-                        temperature=bart_temperature,
                         do_sample=True,
-                        top_p=bart_top_p,
                         early_stopping=True
                     )
@@ -1116,8 +1116,7 @@ class EnhancedDipperHumanizer:
             print(f"Error in BART paraphrasing: {str(e)}")
             return text
-    def apply_sentence_variation(self, text, long_sentence_threshold=20,
-                               short_sentence_threshold=10):
         """Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
         sentences = self.split_into_sentences_advanced(text)
         varied_sentences = []
@@ -1133,7 +1132,7 @@ class EnhancedDipperHumanizer:
             current_length = len(words)
             # Natural sentence length variation
-            if last_sentence_length > long_sentence_threshold and current_length > long_sentence_threshold:
                 # Break up if two long sentences in a row
                 if ',' in sentence:
                     parts = sentence.split(',', 1)
@@ -1148,8 +1147,8 @@ class EnhancedDipperHumanizer:
             # Natural combinations for flow
             if (i < len(sentences) - 1 and
-                current_length < short_sentence_threshold and
-                len(sentences[i+1].split()) < short_sentence_threshold):
                 next_sent = sentences[i+1].strip()
                 # Only combine if it makes semantic sense
@@ -1365,8 +1364,7 @@ class EnhancedDipperHumanizer:
         return html_text
-    def add_natural_flow_variations(self, text, stream_prob=0.08, correction_prob=0.07,
-                                  thinking_prob=0.10):
         """Add more natural flow and rhythm variations for Originality AI"""
         sentences = self.split_into_sentences_advanced(text)
         enhanced_sentences = []
@@ -1375,8 +1373,8 @@ class EnhancedDipperHumanizer:
             if not sentence.strip():
                 continue
-            # Add stream-of-consciousness elements (based on parameter)
-            if random.random() < stream_prob and len(sentence.split()) > 10:
                 stream_elements = [
                     " - wait, let me back up - ",
                     " - actually, scratch that - ",
@@ -1390,8 +1388,8 @@ class EnhancedDipperHumanizer:
                 words.insert(pos, random.choice(stream_elements))
                 sentence = ' '.join(words)
-            # Add human-like self-corrections (based on parameter)
-            if random.random() < correction_prob:
                 corrections = [
                     " - or rather, ",
                     " - well, actually, ",
@@ -1409,8 +1407,8 @@ class EnhancedDipperHumanizer:
                         words.insert(pos, correction)
                 sentence = ' '.join(words)
-            # Add thinking-out-loud patterns (based on parameter)
-            if random.random() < thinking_prob and i > 0:
                 thinking_patterns = [
                     "Come to think of it, ",
                     "Actually, you know what? ",
@@ -1428,45 +1426,11 @@ class EnhancedDipperHumanizer:
         return ' '.join(enhanced_sentences)
-    def process_html(self, html_content, progress_callback=None, **kwargs):
-        """Main processing function with progress callback and configurable parameters"""
         if not html_content.strip():
             return "Please provide HTML content."
-        # Extract all parameters with defaults
-        lex_diversity = kwargs.get('lex_diversity', 75)
-        order_diversity = kwargs.get('order_diversity', 30)
-        temperature = kwargs.get('temperature', 0.85)
-        top_p = kwargs.get('top_p', 0.92)
-        length_multiplier = kwargs.get('length_multiplier', 1.4)
-        no_repeat_ngram = kwargs.get('no_repeat_ngram', 4)
-        bart_usage_prob = kwargs.get('bart_usage_prob', 0.3)
-        bart_temperature = kwargs.get('bart_temperature', 1.1)
-        bart_top_p = kwargs.get('bart_top_p', 0.9)
-        bart_beams = kwargs.get('bart_beams', 2)
-        contraction_prob = kwargs.get('contraction_prob', 0.8)
-        oxford_comma_prob = kwargs.get('oxford_comma_prob', 0.15)
-        which_that_prob = kwargs.get('which_that_prob', 0.08)
-        typo_prob = kwargs.get('typo_prob', 0.02)
-        natural_error_prob = kwargs.get('natural_error_prob', 0.05)
-        speech_pattern_prob = kwargs.get('speech_pattern_prob', 0.15)
-        subtle_error_prob = kwargs.get('subtle_error_prob', 0.10)
-        sentence_combine_prob = kwargs.get('sentence_combine_prob', 0.2)
-        stream_conscious_prob = kwargs.get('stream_conscious_prob', 0.08)
-        self_correction_prob = kwargs.get('self_correction_prob', 0.07)
-        thinking_loud_prob = kwargs.get('thinking_loud_prob', 0.10)
-        long_sentence_threshold = kwargs.get('long_sentence_threshold', 20)
-        short_sentence_threshold = kwargs.get('short_sentence_threshold', 10)
-        # Initialize human variations with parameters
-        self.human_variations = HumanLikeVariations(
-            contraction_prob=contraction_prob,
-            oxford_comma_prob=oxford_comma_prob,
-            which_that_prob=which_that_prob,
-            typo_prob=typo_prob,
-            natural_error_prob=natural_error_prob
-        )
         # Store all script and style content to preserve it
         script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
         style_placeholder = "###STYLE_PLACEHOLDER_{}###"
@@ -1512,49 +1476,24 @@ class EnhancedDipperHumanizer:
                 if len(original_text.split()) < 3:
                     continue
-                # First pass with Dipper using configured parameters
                 paraphrased_text = self.paraphrase_with_dipper(
                     original_text,
-                    lex_diversity=lex_diversity,
-                    order_diversity=order_diversity,
-                    temperature=temperature,
-                    top_p=top_p,
-                    length_multiplier=length_multiplier,
-                    no_repeat_ngram=no_repeat_ngram
-                )
-                # Add natural human patterns with configured probabilities
-                paraphrased_text = self.add_natural_human_patterns(
-                    paraphrased_text,
-                    speech_prob=speech_pattern_prob,
-                    error_prob=subtle_error_prob,
-                    combine_prob=sentence_combine_prob
                 )
-                # Second pass with BART for longer texts (based on configured probability)
                 if self.use_bart and len(paraphrased_text.split()) > 8:
-                    if random.random() < bart_usage_prob:
-                        paraphrased_text = self.paraphrase_with_bart(
-                            paraphrased_text,
-                            bart_temperature=bart_temperature,
-                            bart_top_p=bart_top_p,
-                            bart_beams=bart_beams
-                        )
-                # Apply sentence variation with configured thresholds
-                paraphrased_text = self.apply_sentence_variation(
-                    paraphrased_text,
-                    long_sentence_threshold=long_sentence_threshold,
-                    short_sentence_threshold=short_sentence_threshold
-                )
-                # Add natural flow variations with configured probabilities
-                paraphrased_text = self.add_natural_flow_variations(
-                    paraphrased_text,
-                    stream_prob=stream_conscious_prob,
-                    correction_prob=self_correction_prob,
-                    thinking_prob=thinking_loud_prob
-                )
                 # Fix punctuation and formatting
                 paraphrased_text = self.fix_punctuation(paraphrased_text)
@@ -1719,24 +1658,8 @@ class EnhancedDipperHumanizer:
 # Initialize the humanizer
 humanizer = EnhancedDipperHumanizer()
-def humanize_html(html_input,
-                 # Diversity Settings
-                 lex_diversity=75, order_diversity=30,
-                 # Generation Parameters
-                 temperature=0.85, top_p=0.92, length_multiplier=1.4, no_repeat_ngram=4,
-                 # BART Parameters
-                 bart_usage_prob=0.3, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2,
-                 # Human Variation Parameters
-                 contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
-                 typo_prob=0.02, natural_error_prob=0.05,
-                 # Human Pattern Frequencies
-                 speech_pattern_prob=0.15, subtle_error_prob=0.10, sentence_combine_prob=0.2,
-                 # Flow Variation Parameters
-                 stream_conscious_prob=0.08, self_correction_prob=0.07, thinking_loud_prob=0.10,
-                 # Sentence Variation Parameters
-                 long_sentence_threshold=20, short_sentence_threshold=10,
-                 progress=gr.Progress()):
-    """Gradio interface function with progress updates and all parameters"""
     if not html_input:
         return "Please provide HTML content to humanize."
@@ -1748,33 +1671,10 @@ def humanize_html(html_input,
         if total > 0:
             progress(current / total, desc=f"Processing: {current}/{total} elements")
-    # Pass all parameters to process_html
     result = humanizer.process_html(
         html_input,
-        progress_callback=progress_callback,
-        lex_diversity=lex_diversity,
-        order_diversity=order_diversity,
-        temperature=temperature,
-        top_p=top_p,
-        length_multiplier=length_multiplier,
-        no_repeat_ngram=no_repeat_ngram,
-        bart_usage_prob=bart_usage_prob,
-        bart_temperature=bart_temperature,
-        bart_top_p=bart_top_p,
-        bart_beams=bart_beams,
-        contraction_prob=contraction_prob,
-        oxford_comma_prob=oxford_comma_prob,
-        which_that_prob=which_that_prob,
-        typo_prob=typo_prob,
-        natural_error_prob=natural_error_prob,
-        speech_pattern_prob=speech_pattern_prob,
-        subtle_error_prob=subtle_error_prob,
-        sentence_combine_prob=sentence_combine_prob,
-        stream_conscious_prob=stream_conscious_prob,
-        self_correction_prob=self_correction_prob,
-        thinking_loud_prob=thinking_loud_prob,
-        long_sentence_threshold=long_sentence_threshold,
-        short_sentence_threshold=short_sentence_threshold
     )
     processing_time = time.time() - start_time
@@ -1783,192 +1683,47 @@ def humanize_html(html_input,
     return result
-# Create Gradio interface with all parameter inputs
-with gr.Blocks(title="Enhanced Dipper AI Humanizer - Fully Configurable") as iface:
-    gr.Markdown("""
-    # Enhanced Dipper AI Humanizer - Optimized for Originality AI
-    Ultra-configurable humanizer with fine-grained control over all parameters.
-    Adjust settings to find the perfect balance between human score and content quality.
-    """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            html_input = gr.Textbox(
-                lines=10,
-                placeholder="Paste your HTML content here...",
-                label="HTML Input"
-            )
-            process_btn = gr.Button("Process HTML", variant="primary")
-        with gr.Column(scale=1):
-            html_output = gr.Textbox(
-                lines=10,
-                label="Humanized HTML Output"
-            )
-    with gr.Tabs():
-        with gr.Tab("Diversity Settings"):
-            gr.Markdown("**Controls how much the text is varied from the original**")
-            lex_diversity = gr.Slider(0, 100, value=75, step=5,
-                                    label="Lexical Diversity",
-                                    info="Higher = more word variation (75 balanced, 90+ for max human score)")
-            order_diversity = gr.Slider(0, 100, value=30, step=5,
-                                      label="Order Diversity",
-                                      info="Higher = more word reordering (30 balanced, 40+ for max human score)")
-        with gr.Tab("Generation Parameters"):
-            gr.Markdown("**Fine-tune the AI model's text generation behavior**")
-            temperature = gr.Slider(0.1, 2.0, value=0.85, step=0.05,
-                                  label="Temperature",
-                                  info="Higher = more randomness (0.85 balanced, 0.9+ for max human)")
-            top_p = gr.Slider(0.1, 1.0, value=0.92, step=0.02,
-                            label="Top-p (nucleus sampling)",
-                            info="Higher = wider token selection (0.92 balanced, 0.95 for max human)")
-            length_multiplier = gr.Slider(1.1, 2.0, value=1.4, step=0.1,
-                                        label="Length Multiplier",
-                                        info="How much longer/shorter output can be vs input")
-            no_repeat_ngram = gr.Slider(2, 6, value=4, step=1,
-                                      label="No Repeat N-gram Size",
-                                      info="Prevents repetition of N-word phrases (4 is balanced)")
-        with gr.Tab("BART Parameters"):
-            gr.Markdown("**Settings for secondary BART paraphrasing model**")
-            bart_usage_prob = gr.Slider(0.0, 1.0, value=0.3, step=0.05,
-                                      label="BART Usage Probability",
-                                      info="Chance to use BART for additional variation")
-            bart_temperature = gr.Slider(0.7, 1.5, value=1.1, step=0.05,
-                                       label="BART Temperature",
-                                       info="Temperature for BART model")
-            bart_top_p = gr.Slider(0.8, 1.0, value=0.9, step=0.02,
-                                 label="BART Top-p",
-                                 info="Top-p for BART model")
-            bart_beams = gr.Slider(1, 4, value=2, step=1,
-                                 label="BART Beam Size",
-                                 info="Number of beams for BART generation")
-        with gr.Tab("Human Variations"):
-            gr.Markdown("**Control natural human-like writing patterns**")
-            contraction_prob = gr.Slider(0.0, 1.0, value=0.8, step=0.05,
-                                       label="Contraction Probability",
-                                       info="Chance to use contractions (it's vs it is)")
-            oxford_comma_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
-                                        label="Oxford Comma Skip Probability",
-                                        info="Chance to skip Oxford comma (human-like error)")
-            which_that_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
-                                      label="Which/That Substitution",
-                                      info="Chance to use 'which' instead of 'that'")
-            typo_prob = gr.Slider(0.0, 0.1, value=0.02, step=0.01,
-                                label="Typo Probability",
-                                info="Chance of natural typos per sentence")
-            natural_error_prob = gr.Slider(0.0, 0.2, value=0.05, step=0.01,
-                                         label="Natural Error Probability",
-                                         info="Chance of human-like errors (missing commas, etc)")
-        with gr.Tab("Human Pattern Frequencies"):
-            gr.Markdown("**Frequency of conversational elements**")
-            speech_pattern_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
-                                          label="Speech Pattern Probability",
-                                          info="Chance to add 'you know', 'I mean', etc.")
-            subtle_error_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.05,
-                                        label="Subtle Error Probability",
-                                        info="Chance of subtle human errors")
-            sentence_combine_prob = gr.Slider(0.0, 0.5, value=0.2, step=0.05,
-                                            label="Sentence Combination Probability",
-                                            info="Chance to naturally combine short sentences")
-        with gr.Tab("Flow Variations"):
-            gr.Markdown("**Advanced human-like flow patterns**")
-            stream_conscious_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
-                                            label="Stream of Consciousness",
-                                            info="Chance to add thinking interruptions")
-            self_correction_prob = gr.Slider(0.0, 0.2, value=0.07, step=0.02,
-                                           label="Self-Correction Probability",
-                                           info="Chance to add 'or rather', 'I mean' corrections")
-            thinking_loud_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.02,
-                                         label="Thinking Out Loud",
-                                         info="Chance to add 'Come to think of it' patterns")
-        with gr.Tab("Sentence Structure"):
-            gr.Markdown("**Control sentence length variation**")
-            long_sentence_threshold = gr.Slider(10, 40, value=20, step=2,
-                                              label="Long Sentence Threshold",
-                                              info="Words count to consider sentence 'long'")
-            short_sentence_threshold = gr.Slider(5, 15, value=10, step=1,
-                                               label="Short Sentence Threshold",
-                                               info="Words count to consider sentence 'short'")
-    with gr.Accordion("Preset Configurations", open=False):
-        gr.Markdown("""
-        ### Quick Presets:
-        - **Balanced (Default)**: Current settings - good quality with high human score
-        - **Maximum Human**: Increase all diversity and variation parameters
-        - **Quality Focus**: Decrease variation parameters for cleaner output
-        - **Natural Flow**: Increase flow variations and speech patterns
-        """)
-        preset_buttons = gr.Row()
-        with preset_buttons:
-            balanced_btn = gr.Button("Load Balanced", scale=1)
-            max_human_btn = gr.Button("Load Max Human", scale=1)
-            quality_btn = gr.Button("Load Quality Focus", scale=1)
-            natural_btn = gr.Button("Load Natural Flow", scale=1)
-    # Define preset configurations
-    def load_balanced():
-        return [75, 30, 0.85, 0.92, 1.4, 4, 0.3, 1.1, 0.9, 2,
-                0.8, 0.15, 0.08, 0.02, 0.05, 0.15, 0.10, 0.2,
-                0.08, 0.07, 0.10, 20, 10]
-    def load_max_human():
-        return [90, 40, 0.95, 0.95, 1.5, 4, 0.4, 1.2, 0.95, 2,
-                0.9, 0.20, 0.12, 0.04, 0.08, 0.25, 0.15, 0.3,
-                0.15, 0.10, 0.15, 20, 10]
-    def load_quality():
-        return [65, 20, 0.75, 0.88, 1.3, 4, 0.2, 1.0, 0.85, 3,
-                0.7, 0.10, 0.05, 0.01, 0.03, 0.08, 0.05, 0.15,
-                0.03, 0.03, 0.05, 25, 8]
-    def load_natural():
-        return [70, 25, 0.82, 0.90, 1.4, 4, 0.35, 1.1, 0.9, 2,
-                0.85, 0.12, 0.06, 0.02, 0.04, 0.20, 0.12, 0.25,
-                0.12, 0.10, 0.15, 18, 12]
-    # All parameter components for preset updates
-    all_params = [
-        lex_diversity, order_diversity, temperature, top_p, length_multiplier, no_repeat_ngram,
-        bart_usage_prob, bart_temperature, bart_top_p, bart_beams,
-        contraction_prob, oxford_comma_prob, which_that_prob, typo_prob, natural_error_prob,
-        speech_pattern_prob, subtle_error_prob, sentence_combine_prob,
-        stream_conscious_prob, self_correction_prob, thinking_loud_prob,
-        long_sentence_threshold, short_sentence_threshold
-    ]
-    # Connect preset buttons
-    balanced_btn.click(load_balanced, outputs=all_params)
-    max_human_btn.click(load_max_human, outputs=all_params)
-    quality_btn.click(load_quality, outputs=all_params)
-    natural_btn.click(load_natural, outputs=all_params)
-    # Connect main process button
-    process_btn.click(
-        humanize_html,
-        inputs=[html_input] + all_params,
-        outputs=html_output
-    )
-    # Add example
-    gr.Examples(
-        examples=[["""<article>
 <h1>The Benefits of Regular Exercise</h1>
 <div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
 <p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
 <p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
-</article>"""]],
-        inputs=html_input
-    )
 if __name__ == "__main__":
     # Enable queue for better handling of long-running processes

 class HumanLikeVariations:
     """Add human-like variations and intentional imperfections"""
+    def __init__(self):
         # Common human writing patterns - EXPANDED for Originality AI
         self.casual_transitions = [
              "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
             # Always use contractions where natural
             sent = self.apply_contractions(sent)
+            # Add VERY occasional natural errors (5% chance)
+            if random.random() < 0.05 and len(sent.split()) > 15:
                 error_types = [
                     # Missing comma in compound sentence
                     lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
         }
         for full, contr in contractions.items():
+            if random.random() < 0.8:  # 80% chance to apply each contraction
                 text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
         return text
     def add_minor_errors(self, text):
         """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
+        # Occasionally miss Oxford comma (15% chance)
+        if random.random() < 0.15:
             # Only in lists, not random commas
             text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
+        # Sometimes use 'which' instead of 'that' (8% chance)
+        if random.random() < 0.08:
             # Only for non-restrictive clauses
             matches = re.finditer(r'\b(\w+) that (\w+)', text)
             for match in list(matches)[:1]:  # Only first occurrence
                 if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
                     text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
+        # NEW: Add very occasional typos (2% chance per sentence) - REDUCED AND CONTROLLED
         sentences = text.split('. ')
         for i, sent in enumerate(sentences):
+            if random.random() < 0.02 and len(sent.split()) > 15:  # Only in longer sentences
                 words = sent.split()
                 # Pick a random word to potentially typo
                 word_idx = random.randint(len(words)//2, len(words)-2)  # Avoid start/end
         return text
+    def add_natural_human_patterns(self, text):
         """Add natural human writing patterns that Originality AI associates with human text"""
         sentences = self.split_into_sentences_advanced(text)
         result_sentences = []
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
+            # Add natural speech patterns (15% chance)
+            if random.random() < 0.15 and len(sentence.split()) > 10:
                 # Natural interruptions that humans actually use
                 if random.random() < 0.5:
                     # Add "you know" or "I mean" naturally
                     openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
                     sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
+            # Add subtle errors that humans make (10% chance - reduced)
+            if random.random() < 0.10:
                 words = sentence.split()
                 if len(words) > 5:
                     # Common comma omissions
                             words.insert(idx+1, words[idx])
                             sentence = ' '.join(words)
+            # Natural sentence combinations (20% chance)
+            if i < len(sentences) - 1 and random.random() < 0.2:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
                     # Natural connectors based on content
         except:
             print("BART model not available")
             self.use_bart = False
+        # Initialize human variations handler
+        self.human_variations = HumanLikeVariations()
+    def add_natural_human_patterns(self, text):
         """Add natural human writing patterns that Originality AI associates with human text"""
         sentences = self.split_into_sentences_advanced(text)
         result_sentences = []
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
+            # Add natural speech patterns (15% chance - balanced)
+            if random.random() < 0.15 and len(sentence.split()) > 10:
                 # Natural interruptions that humans actually use
                 if random.random() < 0.5:
                     # Add "you know" or "I mean" naturally
                     openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
                     sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
+            # Add subtle errors that humans make (8% chance)
+            if random.random() < 0.08:
                 words = sentence.split()
                 if len(words) > 5:
                     # Common comma omissions
                             words.insert(idx+1, words[idx])
                             sentence = ' '.join(words)
+            # Natural sentence combinations (20% chance)
+            if i < len(sentences) - 1 and random.random() < 0.2:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
                     # Natural connectors based on content
         return text.strip()
+    def paraphrase_with_dipper(self, text, lex_diversity=60, order_diversity=20):
         """Paraphrase text using Dipper model with sentence-level processing"""
         if not text or len(text.strip()) < 3:
             return text
                 continue
             try:
+                # BALANCED diversity for Originality AI (100% human with better quality)
                 if len(sentence.split()) < 10:
+                    lex_diversity = 70  # High but not extreme
+                    order_diversity = 25
                 else:
+                    lex_diversity = 75  # Balanced diversity
+                    order_diversity = 30  # Moderate order diversity
+                lex_code = int(100 - lex_diversity)
+                order_code = int(100 - order_diversity)
                 # Format input for Dipper
                 if self.is_dipper:
                 # Generate with appropriate variation
                 original_length = len(sentence.split())
+                max_new_length = int(original_length * 1.4)
+                # High variation parameters
+                temp = 0.85  # Slightly reduced from 0.9
+                top_p_val = 0.92  # Slightly reduced from 0.95
                 with torch.no_grad():
                     outputs = self.model.generate(
                         max_length=max_new_length + 20,
                         min_length=max(5, int(original_length * 0.7)),
                         do_sample=True,
+                        top_p=top_p_val,
+                        temperature=temp,
+                        no_repeat_ngram_size=4,  # Allow more repetition for naturalness
                         num_beams=1,  # Greedy for more randomness
                         early_stopping=True
                     )
         # Join sentences back
         result = ' '.join(paraphrased_sentences)
+        # Apply natural human patterns
+        result = self.add_natural_human_patterns(result)
         return result
     def fix_incomplete_sentence_smart(self, generated, original):
         # Clean up sentences
         return [s for s in sentences if s and len(s.strip()) > 0]
+    def paraphrase_with_bart(self, text):
         """Additional paraphrasing with BART for more variation"""
         if not self.use_bart or not text or len(text.strip()) < 3:
             return text
                         **inputs,
                         max_length=int(original_length * 1.4) + 10,
                         min_length=max(5, int(original_length * 0.6)),
+                        num_beams=2,
+                        temperature=1.1,  # Higher temperature
                         do_sample=True,
+                        top_p=0.9,
                         early_stopping=True
                     )
             print(f"Error in BART paraphrasing: {str(e)}")
             return text
+    def apply_sentence_variation(self, text):
         """Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
         sentences = self.split_into_sentences_advanced(text)
         varied_sentences = []
             current_length = len(words)
             # Natural sentence length variation
+            if last_sentence_length > 20 and current_length > 20:
                 # Break up if two long sentences in a row
                 if ',' in sentence:
                     parts = sentence.split(',', 1)
             # Natural combinations for flow
             if (i < len(sentences) - 1 and
+                current_length < 10 and
+                len(sentences[i+1].split()) < 10):
                 next_sent = sentences[i+1].strip()
                 # Only combine if it makes semantic sense
         return html_text
+    def add_natural_flow_variations(self, text):
         """Add more natural flow and rhythm variations for Originality AI"""
         sentences = self.split_into_sentences_advanced(text)
         enhanced_sentences = []
             if not sentence.strip():
                 continue
+            # Add stream-of-consciousness elements (8% chance - reduced)
+            if random.random() < 0.08 and len(sentence.split()) > 10:
                 stream_elements = [
                     " - wait, let me back up - ",
                     " - actually, scratch that - ",
                 words.insert(pos, random.choice(stream_elements))
                 sentence = ' '.join(words)
+            # Add human-like self-corrections (7% chance - reduced)
+            if random.random() < 0.07:
                 corrections = [
                     " - or rather, ",
                     " - well, actually, ",
                         words.insert(pos, correction)
                 sentence = ' '.join(words)
+            # Add thinking-out-loud patterns (10% chance - reduced)
+            if random.random() < 0.10 and i > 0:
                 thinking_patterns = [
                     "Come to think of it, ",
                     "Actually, you know what? ",
         return ' '.join(enhanced_sentences)
+    def process_html(self, html_content, progress_callback=None):
+        """Main processing function with progress callback"""
         if not html_content.strip():
             return "Please provide HTML content."
         # Store all script and style content to preserve it
         script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
         style_placeholder = "###STYLE_PLACEHOLDER_{}###"
                 if len(original_text.split()) < 3:
                     continue
+                # First pass with Dipper
                 paraphrased_text = self.paraphrase_with_dipper(
                     original_text,
+                    lex_diversity=60,
+                    order_diversity=20
                 )
+                # Second pass with BART for longer texts (balanced probability)
                 if self.use_bart and len(paraphrased_text.split()) > 8:
+                    # 30% chance to use BART for more variation (balanced)
+                    if random.random() < 0.3:
+                        paraphrased_text = self.paraphrase_with_bart(paraphrased_text)
+                # Apply sentence variation
+                paraphrased_text = self.apply_sentence_variation(paraphrased_text)
+                # Add natural flow variations
+                paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
                 # Fix punctuation and formatting
                 paraphrased_text = self.fix_punctuation(paraphrased_text)
 # Initialize the humanizer
 humanizer = EnhancedDipperHumanizer()
+def humanize_html(html_input, progress=gr.Progress()):
+    """Gradio interface function with progress updates"""
     if not html_input:
         return "Please provide HTML content to humanize."
         if total > 0:
             progress(current / total, desc=f"Processing: {current}/{total} elements")
+    # Pass progress callback to process_html
     result = humanizer.process_html(
         html_input,
+        progress_callback=progress_callback
     )
     processing_time = time.time() - start_time
     return result
+# Create Gradio interface with queue
+iface = gr.Interface(
+    fn=humanize_html,
+    inputs=[
+        gr.Textbox(
+            lines=10,
+            placeholder="Paste your HTML content here...",
+            label="HTML Input"
+        )
+    ],
+    outputs=gr.Textbox(
+        lines=10,
+        label="Humanized HTML Output"
+    ),
+    title="Enhanced Dipper AI Humanizer - Optimized for Originality AI",
+    description="""
+    Ultra-aggressive humanizer optimized to achieve 100% human scores on both Undetectable AI and Originality AI.
+    Key Features:
+    - Maximum diversity settings (90% lexical, 40% order) for natural variation
+    - Enhanced human patterns: personal opinions, self-corrections, thinking-out-loud
+    - Natural typos, contractions, and conversational flow
+    - Stream-of-consciousness elements and rhetorical questions
+    - Originality AI-specific optimizations: varied sentence starters, emphatic repetitions
+    - Skips content in <strong>, <b>, and heading tags (including inside tables)
+    - Designed to pass the strictest AI detection systems
+    The tool creates genuinely human-like writing patterns that fool even the most sophisticated detectors!
+    ⚠️ Note: Processing may take 5-10 minutes for large HTML documents.
+    """,
+    examples=[
+        ["""<article>
 <h1>The Benefits of Regular Exercise</h1>
 <div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
 <p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
 <p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
+</article>"""]
+    ],
+    theme="default"
+)
 if __name__ == "__main__":
     # Enable queue for better handling of long-running processes