Spaces:

EdysorEdutech
/

human_final

Paused

App Files Files Community

EdysorEdutech commited on Aug 1

Commit

e2ec5b0

verified ·

1 Parent(s): 08270d0

Update app.py

Browse files

Files changed (1) hide show

app.py +357 -112

app.py CHANGED Viewed

@@ -25,7 +25,14 @@ except:
 class HumanLikeVariations:
     """Add human-like variations and intentional imperfections"""
-    def __init__(self):
         # Common human writing patterns - EXPANDED for Originality AI
         self.casual_transitions = [
              "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
@@ -158,8 +165,8 @@ class HumanLikeVariations:
             # Always use contractions where natural
             sent = self.apply_contractions(sent)
-            # Add VERY occasional natural errors (5% chance)
-            if random.random() < 0.05 and len(sent.split()) > 15:
                 error_types = [
                     # Missing comma in compound sentence
                     lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
@@ -199,30 +206,30 @@ class HumanLikeVariations:
         }
         for full, contr in contractions.items():
-            if random.random() < 0.8:  # 80% chance to apply each contraction
                 text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
         return text
     def add_minor_errors(self, text):
         """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
-        # Occasionally miss Oxford comma (15% chance)
-        if random.random() < 0.15:
             # Only in lists, not random commas
             text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
-        # Sometimes use 'which' instead of 'that' (8% chance)
-        if random.random() < 0.08:
             # Only for non-restrictive clauses
             matches = re.finditer(r'\b(\w+) that (\w+)', text)
             for match in list(matches)[:1]:  # Only first occurrence
                 if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
                     text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
-        # NEW: Add very occasional typos (2% chance per sentence) - REDUCED AND CONTROLLED
         sentences = text.split('. ')
         for i, sent in enumerate(sentences):
-            if random.random() < 0.02 and len(sent.split()) > 15:  # Only in longer sentences
                 words = sent.split()
                 # Pick a random word to potentially typo
                 word_idx = random.randint(len(words)//2, len(words)-2)  # Avoid start/end
@@ -267,7 +274,7 @@ class HumanLikeVariations:
         return text
-    def add_natural_human_patterns(self, text):
         """Add natural human writing patterns that Originality AI associates with human text"""
         sentences = self.split_into_sentences_advanced(text)
         result_sentences = []
@@ -279,8 +286,8 @@ class HumanLikeVariations:
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
-            # Add natural speech patterns (15% chance)
-            if random.random() < 0.15 and len(sentence.split()) > 10:
                 # Natural interruptions that humans actually use
                 if random.random() < 0.5:
                     # Add "you know" or "I mean" naturally
@@ -297,8 +304,8 @@ class HumanLikeVariations:
                     openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
                     sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
-            # Add subtle errors that humans make (8% chance)
-            if random.random() < 0.08:
                 words = sentence.split()
                 if len(words) > 5:
                     # Common comma omissions
@@ -311,8 +318,8 @@ class HumanLikeVariations:
                             words.insert(idx+1, words[idx])
                             sentence = ' '.join(words)
-            # Natural sentence combinations (20% chance)
-            if i < len(sentences) - 1 and random.random() < 0.2:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
                     # Natural connectors based on content
@@ -544,11 +551,8 @@ class EnhancedDipperHumanizer:
         except:
             print("BART model not available")
             self.use_bart = False
-        # Initialize human variations handler
-        self.human_variations = HumanLikeVariations()
-    def add_natural_human_patterns(self, text):
         """Add natural human writing patterns that Originality AI associates with human text"""
         sentences = self.split_into_sentences_advanced(text)
         result_sentences = []
@@ -560,8 +564,8 @@ class EnhancedDipperHumanizer:
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
-            # Add natural speech patterns (15% chance)
-            if random.random() < 0.05 and len(sentence.split()) > 10:
                 # Natural interruptions that humans actually use
                 if random.random() < 0.5:
                     # Add "you know" or "I mean" naturally
@@ -578,8 +582,8 @@ class EnhancedDipperHumanizer:
                     openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
                     sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
-            # Add subtle errors that humans make (8% chance)
-            if random.random() < 0.08:
                 words = sentence.split()
                 if len(words) > 5:
                     # Common comma omissions
@@ -592,8 +596,8 @@ class EnhancedDipperHumanizer:
                             words.insert(idx+1, words[idx])
                             sentence = ' '.join(words)
-            # Natural sentence combinations (20% chance)
-            if i < len(sentences) - 1 and random.random() < 0.2:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
                     # Natural connectors based on content
@@ -860,7 +864,9 @@ class EnhancedDipperHumanizer:
         return text.strip()
-    def paraphrase_with_dipper(self, text, lex_diversity=60, order_diversity=20):
         """Paraphrase text using Dipper model with sentence-level processing"""
         if not text or len(text.strip()) < 3:
             return text
@@ -878,16 +884,17 @@ class EnhancedDipperHumanizer:
                 continue
             try:
-                # ULTRA-HIGH diversity for Originality AI
                 if len(sentence.split()) < 10:
-                    lex_diversity = 40  # Very high for short
-                    order_diversity = 15
                 else:
-                    lex_diversity = 50  # Maximum diversity
-                    order_diversity = 20  # Maximum order diversity
-                lex_code = int(100 - lex_diversity)
-                order_code = int(100 - order_diversity)
                 # Format input for Dipper
                 if self.is_dipper:
@@ -913,11 +920,7 @@ class EnhancedDipperHumanizer:
                 # Generate with appropriate variation
                 original_length = len(sentence.split())
-                max_new_length = int(original_length * 1.4)
-                # High variation parameters
-                temp = 0.8
-                top_p_val = 0.9
                 with torch.no_grad():
                     outputs = self.model.generate(
@@ -925,9 +928,9 @@ class EnhancedDipperHumanizer:
                         max_length=max_new_length + 20,
                         min_length=max(5, int(original_length * 0.7)),
                         do_sample=True,
-                        top_p=top_p_val,
-                        temperature=temp,
-                        no_repeat_ngram_size=4,  # Allow more repetition for naturalness
                         num_beams=1,  # Greedy for more randomness
                         early_stopping=True
                     )
@@ -964,9 +967,6 @@ class EnhancedDipperHumanizer:
         # Join sentences back
         result = ' '.join(paraphrased_sentences)
-        # Apply natural human patterns
-        result = self.add_natural_human_patterns(result)
         return result
     def fix_incomplete_sentence_smart(self, generated, original):
@@ -1055,7 +1055,7 @@ class EnhancedDipperHumanizer:
         # Clean up sentences
         return [s for s in sentences if s and len(s.strip()) > 0]
-    def paraphrase_with_bart(self, text):
         """Additional paraphrasing with BART for more variation"""
         if not self.use_bart or not text or len(text.strip()) < 3:
             return text
@@ -1091,10 +1091,10 @@ class EnhancedDipperHumanizer:
                         **inputs,
                         max_length=int(original_length * 1.4) + 10,
                         min_length=max(5, int(original_length * 0.6)),
-                        num_beams=2,
-                        temperature=1.1,  # Higher temperature
                         do_sample=True,
-                        top_p=0.9,
                         early_stopping=True
                     )
@@ -1116,7 +1116,8 @@ class EnhancedDipperHumanizer:
             print(f"Error in BART paraphrasing: {str(e)}")
             return text
-    def apply_sentence_variation(self, text):
         """Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
         sentences = self.split_into_sentences_advanced(text)
         varied_sentences = []
@@ -1132,7 +1133,7 @@ class EnhancedDipperHumanizer:
             current_length = len(words)
             # Natural sentence length variation
-            if last_sentence_length > 20 and current_length > 20:
                 # Break up if two long sentences in a row
                 if ',' in sentence:
                     parts = sentence.split(',', 1)
@@ -1147,8 +1148,8 @@ class EnhancedDipperHumanizer:
             # Natural combinations for flow
             if (i < len(sentences) - 1 and
-                current_length < 10 and
-                len(sentences[i+1].split()) < 10):
                 next_sent = sentences[i+1].strip()
                 # Only combine if it makes semantic sense
@@ -1364,7 +1365,8 @@ class EnhancedDipperHumanizer:
         return html_text
-    def add_natural_flow_variations(self, text):
         """Add more natural flow and rhythm variations for Originality AI"""
         sentences = self.split_into_sentences_advanced(text)
         enhanced_sentences = []
@@ -1373,8 +1375,8 @@ class EnhancedDipperHumanizer:
             if not sentence.strip():
                 continue
-            # Add stream-of-consciousness elements (10% chance)
-            if random.random() < 0.03 and len(sentence.split()) > 10:
                 stream_elements = [
                     " - wait, let me back up - ",
                     " - actually, scratch that - ",
@@ -1388,8 +1390,8 @@ class EnhancedDipperHumanizer:
                 words.insert(pos, random.choice(stream_elements))
                 sentence = ' '.join(words)
-            # Add human-like self-corrections (5% chance)
-            if random.random() < 0.05:
                 corrections = [
                     " - or rather, ",
                     " - well, actually, ",
@@ -1407,8 +1409,8 @@ class EnhancedDipperHumanizer:
                         words.insert(pos, correction)
                 sentence = ' '.join(words)
-            # Add thinking-out-loud patterns (8% chance)
-            if random.random() < 0.08 and i > 0:
                 thinking_patterns = [
                     "Come to think of it, ",
                     "Actually, you know what? ",
@@ -1426,11 +1428,45 @@ class EnhancedDipperHumanizer:
         return ' '.join(enhanced_sentences)
-    def process_html(self, html_content, progress_callback=None):
-        """Main processing function with progress callback"""
         if not html_content.strip():
             return "Please provide HTML content."
         # Store all script and style content to preserve it
         script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
         style_placeholder = "###STYLE_PLACEHOLDER_{}###"
@@ -1476,24 +1512,49 @@ class EnhancedDipperHumanizer:
                 if len(original_text.split()) < 3:
                     continue
-                # First pass with Dipper
                 paraphrased_text = self.paraphrase_with_dipper(
                     original_text,
-                    lex_diversity=60,
-                    order_diversity=20
                 )
-                # Second pass with BART for longer texts (increased probability)
                 if self.use_bart and len(paraphrased_text.split()) > 8:
-                    # 50% chance to use BART for more variation
-                    if random.random() < 0.2:
-                        paraphrased_text = self.paraphrase_with_bart(paraphrased_text)
-                # Apply sentence variation
-                paraphrased_text = self.apply_sentence_variation(paraphrased_text)
-                # Add natural flow variations
-                paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
                 # Fix punctuation and formatting
                 paraphrased_text = self.fix_punctuation(paraphrased_text)
@@ -1658,8 +1719,24 @@ class EnhancedDipperHumanizer:
 # Initialize the humanizer
 humanizer = EnhancedDipperHumanizer()
-def humanize_html(html_input, progress=gr.Progress()):
-    """Gradio interface function with progress updates"""
     if not html_input:
         return "Please provide HTML content to humanize."
@@ -1671,10 +1748,33 @@ def humanize_html(html_input, progress=gr.Progress()):
         if total > 0:
             progress(current / total, desc=f"Processing: {current}/{total} elements")
-    # Pass progress callback to process_html
     result = humanizer.process_html(
         html_input,
-        progress_callback=progress_callback
     )
     processing_time = time.time() - start_time
@@ -1683,47 +1783,192 @@ def humanize_html(html_input, progress=gr.Progress()):
     return result
-# Create Gradio interface with queue
-iface = gr.Interface(
-    fn=humanize_html,
-    inputs=[
-        gr.Textbox(
-            lines=10,
-            placeholder="Paste your HTML content here...",
-            label="HTML Input"
-        )
-    ],
-    outputs=gr.Textbox(
-        lines=10,
-        label="Humanized HTML Output"
-    ),
-    title="Enhanced Dipper AI Humanizer - Optimized for Originality AI",
-    description="""
-    Ultra-aggressive humanizer optimized to achieve 100% human scores on both Undetectable AI and Originality AI.
-    Key Features:
-    - Maximum diversity settings (90% lexical, 40% order) for natural variation
-    - Enhanced human patterns: personal opinions, self-corrections, thinking-out-loud
-    - Natural typos, contractions, and conversational flow
-    - Stream-of-consciousness elements and rhetorical questions
-    - Originality AI-specific optimizations: varied sentence starters, emphatic repetitions
-    - Skips content in <strong>, <b>, and heading tags (including inside tables)
-    - Designed to pass the strictest AI detection systems
-    The tool creates genuinely human-like writing patterns that fool even the most sophisticated detectors!
-    ⚠️ Note: Processing may take 5-10 minutes for large HTML documents.
-    """,
-    examples=[
-        ["""<article>
 <h1>The Benefits of Regular Exercise</h1>
 <div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
 <p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
 <p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
-</article>"""]
-    ],
-    theme="default"
-)
 if __name__ == "__main__":
     # Enable queue for better handling of long-running processes

 class HumanLikeVariations:
     """Add human-like variations and intentional imperfections"""
+    def __init__(self, contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
+                 typo_prob=0.02, natural_error_prob=0.05):
+        self.contraction_prob = contraction_prob
+        self.oxford_comma_prob = oxford_comma_prob
+        self.which_that_prob = which_that_prob
+        self.typo_prob = typo_prob
+        self.natural_error_prob = natural_error_prob
         # Common human writing patterns - EXPANDED for Originality AI
         self.casual_transitions = [
              "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
             # Always use contractions where natural
             sent = self.apply_contractions(sent)
+            # Add VERY occasional natural errors (based on parameter)
+            if random.random() < self.natural_error_prob and len(sent.split()) > 15:
                 error_types = [
                     # Missing comma in compound sentence
                     lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
         }
         for full, contr in contractions.items():
+            if random.random() < self.contraction_prob:  # Use configurable probability
                 text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
         return text
     def add_minor_errors(self, text):
         """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
+        # Occasionally miss Oxford comma (based on parameter)
+        if random.random() < self.oxford_comma_prob:
             # Only in lists, not random commas
             text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
+        # Sometimes use 'which' instead of 'that' (based on parameter)
+        if random.random() < self.which_that_prob:
             # Only for non-restrictive clauses
             matches = re.finditer(r'\b(\w+) that (\w+)', text)
             for match in list(matches)[:1]:  # Only first occurrence
                 if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
                     text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
+        # NEW: Add very occasional typos (based on parameter) - REDUCED AND CONTROLLED
         sentences = text.split('. ')
         for i, sent in enumerate(sentences):
+            if random.random() < self.typo_prob and len(sent.split()) > 15:  # Only in longer sentences
                 words = sent.split()
                 # Pick a random word to potentially typo
                 word_idx = random.randint(len(words)//2, len(words)-2)  # Avoid start/end
         return text
+    def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
         """Add natural human writing patterns that Originality AI associates with human text"""
         sentences = self.split_into_sentences_advanced(text)
         result_sentences = []
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
+            # Add natural speech patterns (based on parameter)
+            if random.random() < speech_prob and len(sentence.split()) > 10:
                 # Natural interruptions that humans actually use
                 if random.random() < 0.5:
                     # Add "you know" or "I mean" naturally
                     openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
                     sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
+            # Add subtle errors that humans make (based on parameter)
+            if random.random() < error_prob:
                 words = sentence.split()
                 if len(words) > 5:
                     # Common comma omissions
                             words.insert(idx+1, words[idx])
                             sentence = ' '.join(words)
+            # Natural sentence combinations (based on parameter)
+            if i < len(sentences) - 1 and random.random() < combine_prob:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
                     # Natural connectors based on content
         except:
             print("BART model not available")
             self.use_bart = False
+    def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
         """Add natural human writing patterns that Originality AI associates with human text"""
         sentences = self.split_into_sentences_advanced(text)
         result_sentences = []
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
+            # Add natural speech patterns (based on parameter)
+            if random.random() < speech_prob and len(sentence.split()) > 10:
                 # Natural interruptions that humans actually use
                 if random.random() < 0.5:
                     # Add "you know" or "I mean" naturally
                     openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
                     sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
+            # Add subtle errors that humans make (based on parameter)
+            if random.random() < error_prob:
                 words = sentence.split()
                 if len(words) > 5:
                     # Common comma omissions
                             words.insert(idx+1, words[idx])
                             sentence = ' '.join(words)
+            # Natural sentence combinations (based on parameter)
+            if i < len(sentences) - 1 and random.random() < combine_prob:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
                     # Natural connectors based on content
         return text.strip()
+    def paraphrase_with_dipper(self, text, lex_diversity=75, order_diversity=30,
+                             temperature=0.85, top_p=0.92, length_multiplier=1.4,
+                             no_repeat_ngram=4):
         """Paraphrase text using Dipper model with sentence-level processing"""
         if not text or len(text.strip()) < 3:
             return text
                 continue
             try:
+                # Apply diversity settings based on sentence length
                 if len(sentence.split()) < 10:
+                    # Use slightly lower diversity for short sentences
+                    actual_lex = max(lex_diversity - 5, 50)
+                    actual_order = max(order_diversity - 5, 15)
                 else:
+                    actual_lex = lex_diversity
+                    actual_order = order_diversity
+                lex_code = int(100 - actual_lex)
+                order_code = int(100 - actual_order)
                 # Format input for Dipper
                 if self.is_dipper:
                 # Generate with appropriate variation
                 original_length = len(sentence.split())
+                max_new_length = int(original_length * length_multiplier)
                 with torch.no_grad():
                     outputs = self.model.generate(
                         max_length=max_new_length + 20,
                         min_length=max(5, int(original_length * 0.7)),
                         do_sample=True,
+                        top_p=top_p,
+                        temperature=temperature,
+                        no_repeat_ngram_size=no_repeat_ngram,
                         num_beams=1,  # Greedy for more randomness
                         early_stopping=True
                     )
         # Join sentences back
         result = ' '.join(paraphrased_sentences)
         return result
     def fix_incomplete_sentence_smart(self, generated, original):
         # Clean up sentences
         return [s for s in sentences if s and len(s.strip()) > 0]
+    def paraphrase_with_bart(self, text, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2):
         """Additional paraphrasing with BART for more variation"""
         if not self.use_bart or not text or len(text.strip()) < 3:
             return text
                         **inputs,
                         max_length=int(original_length * 1.4) + 10,
                         min_length=max(5, int(original_length * 0.6)),
+                        num_beams=bart_beams,
+                        temperature=bart_temperature,
                         do_sample=True,
+                        top_p=bart_top_p,
                         early_stopping=True
                     )
             print(f"Error in BART paraphrasing: {str(e)}")
             return text
+    def apply_sentence_variation(self, text, long_sentence_threshold=20,
+                               short_sentence_threshold=10):
         """Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
         sentences = self.split_into_sentences_advanced(text)
         varied_sentences = []
             current_length = len(words)
             # Natural sentence length variation
+            if last_sentence_length > long_sentence_threshold and current_length > long_sentence_threshold:
                 # Break up if two long sentences in a row
                 if ',' in sentence:
                     parts = sentence.split(',', 1)
             # Natural combinations for flow
             if (i < len(sentences) - 1 and
+                current_length < short_sentence_threshold and
+                len(sentences[i+1].split()) < short_sentence_threshold):
                 next_sent = sentences[i+1].strip()
                 # Only combine if it makes semantic sense
         return html_text
+    def add_natural_flow_variations(self, text, stream_prob=0.08, correction_prob=0.07,
+                                  thinking_prob=0.10):
         """Add more natural flow and rhythm variations for Originality AI"""
         sentences = self.split_into_sentences_advanced(text)
         enhanced_sentences = []
             if not sentence.strip():
                 continue
+            # Add stream-of-consciousness elements (based on parameter)
+            if random.random() < stream_prob and len(sentence.split()) > 10:
                 stream_elements = [
                     " - wait, let me back up - ",
                     " - actually, scratch that - ",
                 words.insert(pos, random.choice(stream_elements))
                 sentence = ' '.join(words)
+            # Add human-like self-corrections (based on parameter)
+            if random.random() < correction_prob:
                 corrections = [
                     " - or rather, ",
                     " - well, actually, ",
                         words.insert(pos, correction)
                 sentence = ' '.join(words)
+            # Add thinking-out-loud patterns (based on parameter)
+            if random.random() < thinking_prob and i > 0:
                 thinking_patterns = [
                     "Come to think of it, ",
                     "Actually, you know what? ",
         return ' '.join(enhanced_sentences)
+    def process_html(self, html_content, progress_callback=None, **kwargs):
+        """Main processing function with progress callback and configurable parameters"""
         if not html_content.strip():
             return "Please provide HTML content."
+        # Extract all parameters with defaults
+        lex_diversity = kwargs.get('lex_diversity', 75)
+        order_diversity = kwargs.get('order_diversity', 30)
+        temperature = kwargs.get('temperature', 0.85)
+        top_p = kwargs.get('top_p', 0.92)
+        length_multiplier = kwargs.get('length_multiplier', 1.4)
+        no_repeat_ngram = kwargs.get('no_repeat_ngram', 4)
+        bart_usage_prob = kwargs.get('bart_usage_prob', 0.3)
+        bart_temperature = kwargs.get('bart_temperature', 1.1)
+        bart_top_p = kwargs.get('bart_top_p', 0.9)
+        bart_beams = kwargs.get('bart_beams', 2)
+        contraction_prob = kwargs.get('contraction_prob', 0.8)
+        oxford_comma_prob = kwargs.get('oxford_comma_prob', 0.15)
+        which_that_prob = kwargs.get('which_that_prob', 0.08)
+        typo_prob = kwargs.get('typo_prob', 0.02)
+        natural_error_prob = kwargs.get('natural_error_prob', 0.05)
+        speech_pattern_prob = kwargs.get('speech_pattern_prob', 0.15)
+        subtle_error_prob = kwargs.get('subtle_error_prob', 0.10)
+        sentence_combine_prob = kwargs.get('sentence_combine_prob', 0.2)
+        stream_conscious_prob = kwargs.get('stream_conscious_prob', 0.08)
+        self_correction_prob = kwargs.get('self_correction_prob', 0.07)
+        thinking_loud_prob = kwargs.get('thinking_loud_prob', 0.10)
+        long_sentence_threshold = kwargs.get('long_sentence_threshold', 20)
+        short_sentence_threshold = kwargs.get('short_sentence_threshold', 10)
+        # Initialize human variations with parameters
+        self.human_variations = HumanLikeVariations(
+            contraction_prob=contraction_prob,
+            oxford_comma_prob=oxford_comma_prob,
+            which_that_prob=which_that_prob,
+            typo_prob=typo_prob,
+            natural_error_prob=natural_error_prob
+        )
         # Store all script and style content to preserve it
         script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
         style_placeholder = "###STYLE_PLACEHOLDER_{}###"
                 if len(original_text.split()) < 3:
                     continue
+                # First pass with Dipper using configured parameters
                 paraphrased_text = self.paraphrase_with_dipper(
                     original_text,
+                    lex_diversity=lex_diversity,
+                    order_diversity=order_diversity,
+                    temperature=temperature,
+                    top_p=top_p,
+                    length_multiplier=length_multiplier,
+                    no_repeat_ngram=no_repeat_ngram
+                )
+                # Add natural human patterns with configured probabilities
+                paraphrased_text = self.add_natural_human_patterns(
+                    paraphrased_text,
+                    speech_prob=speech_pattern_prob,
+                    error_prob=subtle_error_prob,
+                    combine_prob=sentence_combine_prob
                 )
+                # Second pass with BART for longer texts (based on configured probability)
                 if self.use_bart and len(paraphrased_text.split()) > 8:
+                    if random.random() < bart_usage_prob:
+                        paraphrased_text = self.paraphrase_with_bart(
+                            paraphrased_text,
+                            bart_temperature=bart_temperature,
+                            bart_top_p=bart_top_p,
+                            bart_beams=bart_beams
+                        )
+                # Apply sentence variation with configured thresholds
+                paraphrased_text = self.apply_sentence_variation(
+                    paraphrased_text,
+                    long_sentence_threshold=long_sentence_threshold,
+                    short_sentence_threshold=short_sentence_threshold
+                )
+                # Add natural flow variations with configured probabilities
+                paraphrased_text = self.add_natural_flow_variations(
+                    paraphrased_text,
+                    stream_prob=stream_conscious_prob,
+                    correction_prob=self_correction_prob,
+                    thinking_prob=thinking_loud_prob
+                )
                 # Fix punctuation and formatting
                 paraphrased_text = self.fix_punctuation(paraphrased_text)
 # Initialize the humanizer
 humanizer = EnhancedDipperHumanizer()
+def humanize_html(html_input,
+                 # Diversity Settings
+                 lex_diversity=75, order_diversity=30,
+                 # Generation Parameters
+                 temperature=0.85, top_p=0.92, length_multiplier=1.4, no_repeat_ngram=4,
+                 # BART Parameters
+                 bart_usage_prob=0.3, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2,
+                 # Human Variation Parameters
+                 contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
+                 typo_prob=0.02, natural_error_prob=0.05,
+                 # Human Pattern Frequencies
+                 speech_pattern_prob=0.15, subtle_error_prob=0.10, sentence_combine_prob=0.2,
+                 # Flow Variation Parameters
+                 stream_conscious_prob=0.08, self_correction_prob=0.07, thinking_loud_prob=0.10,
+                 # Sentence Variation Parameters
+                 long_sentence_threshold=20, short_sentence_threshold=10,
+                 progress=gr.Progress()):
+    """Gradio interface function with progress updates and all parameters"""
     if not html_input:
         return "Please provide HTML content to humanize."
         if total > 0:
             progress(current / total, desc=f"Processing: {current}/{total} elements")
+    # Pass all parameters to process_html
     result = humanizer.process_html(
         html_input,
+        progress_callback=progress_callback,
+        lex_diversity=lex_diversity,
+        order_diversity=order_diversity,
+        temperature=temperature,
+        top_p=top_p,
+        length_multiplier=length_multiplier,
+        no_repeat_ngram=no_repeat_ngram,
+        bart_usage_prob=bart_usage_prob,
+        bart_temperature=bart_temperature,
+        bart_top_p=bart_top_p,
+        bart_beams=bart_beams,
+        contraction_prob=contraction_prob,
+        oxford_comma_prob=oxford_comma_prob,
+        which_that_prob=which_that_prob,
+        typo_prob=typo_prob,
+        natural_error_prob=natural_error_prob,
+        speech_pattern_prob=speech_pattern_prob,
+        subtle_error_prob=subtle_error_prob,
+        sentence_combine_prob=sentence_combine_prob,
+        stream_conscious_prob=stream_conscious_prob,
+        self_correction_prob=self_correction_prob,
+        thinking_loud_prob=thinking_loud_prob,
+        long_sentence_threshold=long_sentence_threshold,
+        short_sentence_threshold=short_sentence_threshold
     )
     processing_time = time.time() - start_time
     return result
+# Create Gradio interface with all parameter inputs
+with gr.Blocks(title="Enhanced Dipper AI Humanizer - Fully Configurable") as iface:
+    gr.Markdown("""
+    # Enhanced Dipper AI Humanizer - Optimized for Originality AI
+    Ultra-configurable humanizer with fine-grained control over all parameters.
+    Adjust settings to find the perfect balance between human score and content quality.
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            html_input = gr.Textbox(
+                lines=10,
+                placeholder="Paste your HTML content here...",
+                label="HTML Input"
+            )
+            process_btn = gr.Button("Process HTML", variant="primary")
+        with gr.Column(scale=1):
+            html_output = gr.Textbox(
+                lines=10,
+                label="Humanized HTML Output"
+            )
+    with gr.Tabs():
+        with gr.Tab("Diversity Settings"):
+            gr.Markdown("**Controls how much the text is varied from the original**")
+            lex_diversity = gr.Slider(0, 100, value=75, step=5,
+                                    label="Lexical Diversity",
+                                    info="Higher = more word variation (75 balanced, 90+ for max human score)")
+            order_diversity = gr.Slider(0, 100, value=30, step=5,
+                                      label="Order Diversity",
+                                      info="Higher = more word reordering (30 balanced, 40+ for max human score)")
+        with gr.Tab("Generation Parameters"):
+            gr.Markdown("**Fine-tune the AI model's text generation behavior**")
+            temperature = gr.Slider(0.1, 2.0, value=0.85, step=0.05,
+                                  label="Temperature",
+                                  info="Higher = more randomness (0.85 balanced, 0.9+ for max human)")
+            top_p = gr.Slider(0.1, 1.0, value=0.92, step=0.02,
+                            label="Top-p (nucleus sampling)",
+                            info="Higher = wider token selection (0.92 balanced, 0.95 for max human)")
+            length_multiplier = gr.Slider(1.1, 2.0, value=1.4, step=0.1,
+                                        label="Length Multiplier",
+                                        info="How much longer/shorter output can be vs input")
+            no_repeat_ngram = gr.Slider(2, 6, value=4, step=1,
+                                      label="No Repeat N-gram Size",
+                                      info="Prevents repetition of N-word phrases (4 is balanced)")
+        with gr.Tab("BART Parameters"):
+            gr.Markdown("**Settings for secondary BART paraphrasing model**")
+            bart_usage_prob = gr.Slider(0.0, 1.0, value=0.3, step=0.05,
+                                      label="BART Usage Probability",
+                                      info="Chance to use BART for additional variation")
+            bart_temperature = gr.Slider(0.7, 1.5, value=1.1, step=0.05,
+                                       label="BART Temperature",
+                                       info="Temperature for BART model")
+            bart_top_p = gr.Slider(0.8, 1.0, value=0.9, step=0.02,
+                                 label="BART Top-p",
+                                 info="Top-p for BART model")
+            bart_beams = gr.Slider(1, 4, value=2, step=1,
+                                 label="BART Beam Size",
+                                 info="Number of beams for BART generation")
+        with gr.Tab("Human Variations"):
+            gr.Markdown("**Control natural human-like writing patterns**")
+            contraction_prob = gr.Slider(0.0, 1.0, value=0.8, step=0.05,
+                                       label="Contraction Probability",
+                                       info="Chance to use contractions (it's vs it is)")
+            oxford_comma_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
+                                        label="Oxford Comma Skip Probability",
+                                        info="Chance to skip Oxford comma (human-like error)")
+            which_that_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
+                                      label="Which/That Substitution",
+                                      info="Chance to use 'which' instead of 'that'")
+            typo_prob = gr.Slider(0.0, 0.1, value=0.02, step=0.01,
+                                label="Typo Probability",
+                                info="Chance of natural typos per sentence")
+            natural_error_prob = gr.Slider(0.0, 0.2, value=0.05, step=0.01,
+                                         label="Natural Error Probability",
+                                         info="Chance of human-like errors (missing commas, etc)")
+        with gr.Tab("Human Pattern Frequencies"):
+            gr.Markdown("**Frequency of conversational elements**")
+            speech_pattern_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
+                                          label="Speech Pattern Probability",
+                                          info="Chance to add 'you know', 'I mean', etc.")
+            subtle_error_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.05,
+                                        label="Subtle Error Probability",
+                                        info="Chance of subtle human errors")
+            sentence_combine_prob = gr.Slider(0.0, 0.5, value=0.2, step=0.05,
+                                            label="Sentence Combination Probability",
+                                            info="Chance to naturally combine short sentences")
+        with gr.Tab("Flow Variations"):
+            gr.Markdown("**Advanced human-like flow patterns**")
+            stream_conscious_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
+                                            label="Stream of Consciousness",
+                                            info="Chance to add thinking interruptions")
+            self_correction_prob = gr.Slider(0.0, 0.2, value=0.07, step=0.02,
+                                           label="Self-Correction Probability",
+                                           info="Chance to add 'or rather', 'I mean' corrections")
+            thinking_loud_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.02,
+                                         label="Thinking Out Loud",
+                                         info="Chance to add 'Come to think of it' patterns")
+        with gr.Tab("Sentence Structure"):
+            gr.Markdown("**Control sentence length variation**")
+            long_sentence_threshold = gr.Slider(10, 40, value=20, step=2,
+                                              label="Long Sentence Threshold",
+                                              info="Words count to consider sentence 'long'")
+            short_sentence_threshold = gr.Slider(5, 15, value=10, step=1,
+                                               label="Short Sentence Threshold",
+                                               info="Words count to consider sentence 'short'")
+    with gr.Accordion("Preset Configurations", open=False):
+        gr.Markdown("""
+        ### Quick Presets:
+        - **Balanced (Default)**: Current settings - good quality with high human score
+        - **Maximum Human**: Increase all diversity and variation parameters
+        - **Quality Focus**: Decrease variation parameters for cleaner output
+        - **Natural Flow**: Increase flow variations and speech patterns
+        """)
+        preset_buttons = gr.Row()
+        with preset_buttons:
+            balanced_btn = gr.Button("Load Balanced", scale=1)
+            max_human_btn = gr.Button("Load Max Human", scale=1)
+            quality_btn = gr.Button("Load Quality Focus", scale=1)
+            natural_btn = gr.Button("Load Natural Flow", scale=1)
+    # Define preset configurations
+    def load_balanced():
+        return [75, 30, 0.85, 0.92, 1.4, 4, 0.3, 1.1, 0.9, 2,
+                0.8, 0.15, 0.08, 0.02, 0.05, 0.15, 0.10, 0.2,
+                0.08, 0.07, 0.10, 20, 10]
+    def load_max_human():
+        return [90, 40, 0.95, 0.95, 1.5, 4, 0.4, 1.2, 0.95, 2,
+                0.9, 0.20, 0.12, 0.04, 0.08, 0.25, 0.15, 0.3,
+                0.15, 0.10, 0.15, 20, 10]
+    def load_quality():
+        return [65, 20, 0.75, 0.88, 1.3, 4, 0.2, 1.0, 0.85, 3,
+                0.7, 0.10, 0.05, 0.01, 0.03, 0.08, 0.05, 0.15,
+                0.03, 0.03, 0.05, 25, 8]
+    def load_natural():
+        return [70, 25, 0.82, 0.90, 1.4, 4, 0.35, 1.1, 0.9, 2,
+                0.85, 0.12, 0.06, 0.02, 0.04, 0.20, 0.12, 0.25,
+                0.12, 0.10, 0.15, 18, 12]
+    # All parameter components for preset updates
+    all_params = [
+        lex_diversity, order_diversity, temperature, top_p, length_multiplier, no_repeat_ngram,
+        bart_usage_prob, bart_temperature, bart_top_p, bart_beams,
+        contraction_prob, oxford_comma_prob, which_that_prob, typo_prob, natural_error_prob,
+        speech_pattern_prob, subtle_error_prob, sentence_combine_prob,
+        stream_conscious_prob, self_correction_prob, thinking_loud_prob,
+        long_sentence_threshold, short_sentence_threshold
+    ]
+    # Connect preset buttons
+    balanced_btn.click(load_balanced, outputs=all_params)
+    max_human_btn.click(load_max_human, outputs=all_params)
+    quality_btn.click(load_quality, outputs=all_params)
+    natural_btn.click(load_natural, outputs=all_params)
+    # Connect main process button
+    process_btn.click(
+        humanize_html,
+        inputs=[html_input] + all_params,
+        outputs=html_output
+    )
+    # Add example
+    gr.Examples(
+        examples=[["""<article>
 <h1>The Benefits of Regular Exercise</h1>
 <div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
 <p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
 <p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
+</article>"""]],
+        inputs=html_input
+    )
 if __name__ == "__main__":
     # Enable queue for better handling of long-running processes