EdysorEdutech commited on
Commit
e2ec5b0
·
verified ·
1 Parent(s): 08270d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +357 -112
app.py CHANGED
@@ -25,7 +25,14 @@ except:
25
  class HumanLikeVariations:
26
  """Add human-like variations and intentional imperfections"""
27
 
28
- def __init__(self):
 
 
 
 
 
 
 
29
  # Common human writing patterns - EXPANDED for Originality AI
30
  self.casual_transitions = [
31
  "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
@@ -158,8 +165,8 @@ class HumanLikeVariations:
158
  # Always use contractions where natural
159
  sent = self.apply_contractions(sent)
160
 
161
- # Add VERY occasional natural errors (5% chance)
162
- if random.random() < 0.05 and len(sent.split()) > 15:
163
  error_types = [
164
  # Missing comma in compound sentence
165
  lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
@@ -199,30 +206,30 @@ class HumanLikeVariations:
199
  }
200
 
201
  for full, contr in contractions.items():
202
- if random.random() < 0.8: # 80% chance to apply each contraction
203
  text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
204
 
205
  return text
206
 
207
  def add_minor_errors(self, text):
208
  """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
209
- # Occasionally miss Oxford comma (15% chance)
210
- if random.random() < 0.15:
211
  # Only in lists, not random commas
212
  text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
213
 
214
- # Sometimes use 'which' instead of 'that' (8% chance)
215
- if random.random() < 0.08:
216
  # Only for non-restrictive clauses
217
  matches = re.finditer(r'\b(\w+) that (\w+)', text)
218
  for match in list(matches)[:1]: # Only first occurrence
219
  if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
220
  text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
221
 
222
- # NEW: Add very occasional typos (2% chance per sentence) - REDUCED AND CONTROLLED
223
  sentences = text.split('. ')
224
  for i, sent in enumerate(sentences):
225
- if random.random() < 0.02 and len(sent.split()) > 15: # Only in longer sentences
226
  words = sent.split()
227
  # Pick a random word to potentially typo
228
  word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
@@ -267,7 +274,7 @@ class HumanLikeVariations:
267
 
268
  return text
269
 
270
- def add_natural_human_patterns(self, text):
271
  """Add natural human writing patterns that Originality AI associates with human text"""
272
  sentences = self.split_into_sentences_advanced(text)
273
  result_sentences = []
@@ -279,8 +286,8 @@ class HumanLikeVariations:
279
  # Natural contractions throughout
280
  sentence = self.apply_contractions(sentence)
281
 
282
- # Add natural speech patterns (15% chance)
283
- if random.random() < 0.15 and len(sentence.split()) > 10:
284
  # Natural interruptions that humans actually use
285
  if random.random() < 0.5:
286
  # Add "you know" or "I mean" naturally
@@ -297,8 +304,8 @@ class HumanLikeVariations:
297
  openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
298
  sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
299
 
300
- # Add subtle errors that humans make (8% chance)
301
- if random.random() < 0.08:
302
  words = sentence.split()
303
  if len(words) > 5:
304
  # Common comma omissions
@@ -311,8 +318,8 @@ class HumanLikeVariations:
311
  words.insert(idx+1, words[idx])
312
  sentence = ' '.join(words)
313
 
314
- # Natural sentence combinations (20% chance)
315
- if i < len(sentences) - 1 and random.random() < 0.2:
316
  next_sent = sentences[i+1].strip()
317
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
318
  # Natural connectors based on content
@@ -544,11 +551,8 @@ class EnhancedDipperHumanizer:
544
  except:
545
  print("BART model not available")
546
  self.use_bart = False
547
-
548
- # Initialize human variations handler
549
- self.human_variations = HumanLikeVariations()
550
 
551
- def add_natural_human_patterns(self, text):
552
  """Add natural human writing patterns that Originality AI associates with human text"""
553
  sentences = self.split_into_sentences_advanced(text)
554
  result_sentences = []
@@ -560,8 +564,8 @@ class EnhancedDipperHumanizer:
560
  # Natural contractions throughout
561
  sentence = self.apply_contractions(sentence)
562
 
563
- # Add natural speech patterns (15% chance)
564
- if random.random() < 0.05 and len(sentence.split()) > 10:
565
  # Natural interruptions that humans actually use
566
  if random.random() < 0.5:
567
  # Add "you know" or "I mean" naturally
@@ -578,8 +582,8 @@ class EnhancedDipperHumanizer:
578
  openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
579
  sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
580
 
581
- # Add subtle errors that humans make (8% chance)
582
- if random.random() < 0.08:
583
  words = sentence.split()
584
  if len(words) > 5:
585
  # Common comma omissions
@@ -592,8 +596,8 @@ class EnhancedDipperHumanizer:
592
  words.insert(idx+1, words[idx])
593
  sentence = ' '.join(words)
594
 
595
- # Natural sentence combinations (20% chance)
596
- if i < len(sentences) - 1 and random.random() < 0.2:
597
  next_sent = sentences[i+1].strip()
598
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
599
  # Natural connectors based on content
@@ -860,7 +864,9 @@ class EnhancedDipperHumanizer:
860
 
861
  return text.strip()
862
 
863
- def paraphrase_with_dipper(self, text, lex_diversity=60, order_diversity=20):
 
 
864
  """Paraphrase text using Dipper model with sentence-level processing"""
865
  if not text or len(text.strip()) < 3:
866
  return text
@@ -878,16 +884,17 @@ class EnhancedDipperHumanizer:
878
  continue
879
 
880
  try:
881
- # ULTRA-HIGH diversity for Originality AI
882
  if len(sentence.split()) < 10:
883
- lex_diversity = 40 # Very high for short
884
- order_diversity = 15
 
885
  else:
886
- lex_diversity = 50 # Maximum diversity
887
- order_diversity = 20 # Maximum order diversity
888
 
889
- lex_code = int(100 - lex_diversity)
890
- order_code = int(100 - order_diversity)
891
 
892
  # Format input for Dipper
893
  if self.is_dipper:
@@ -913,11 +920,7 @@ class EnhancedDipperHumanizer:
913
 
914
  # Generate with appropriate variation
915
  original_length = len(sentence.split())
916
- max_new_length = int(original_length * 1.4)
917
-
918
- # High variation parameters
919
- temp = 0.8
920
- top_p_val = 0.9
921
 
922
  with torch.no_grad():
923
  outputs = self.model.generate(
@@ -925,9 +928,9 @@ class EnhancedDipperHumanizer:
925
  max_length=max_new_length + 20,
926
  min_length=max(5, int(original_length * 0.7)),
927
  do_sample=True,
928
- top_p=top_p_val,
929
- temperature=temp,
930
- no_repeat_ngram_size=4, # Allow more repetition for naturalness
931
  num_beams=1, # Greedy for more randomness
932
  early_stopping=True
933
  )
@@ -964,9 +967,6 @@ class EnhancedDipperHumanizer:
964
  # Join sentences back
965
  result = ' '.join(paraphrased_sentences)
966
 
967
- # Apply natural human patterns
968
- result = self.add_natural_human_patterns(result)
969
-
970
  return result
971
 
972
  def fix_incomplete_sentence_smart(self, generated, original):
@@ -1055,7 +1055,7 @@ class EnhancedDipperHumanizer:
1055
  # Clean up sentences
1056
  return [s for s in sentences if s and len(s.strip()) > 0]
1057
 
1058
- def paraphrase_with_bart(self, text):
1059
  """Additional paraphrasing with BART for more variation"""
1060
  if not self.use_bart or not text or len(text.strip()) < 3:
1061
  return text
@@ -1091,10 +1091,10 @@ class EnhancedDipperHumanizer:
1091
  **inputs,
1092
  max_length=int(original_length * 1.4) + 10,
1093
  min_length=max(5, int(original_length * 0.6)),
1094
- num_beams=2,
1095
- temperature=1.1, # Higher temperature
1096
  do_sample=True,
1097
- top_p=0.9,
1098
  early_stopping=True
1099
  )
1100
 
@@ -1116,7 +1116,8 @@ class EnhancedDipperHumanizer:
1116
  print(f"Error in BART paraphrasing: {str(e)}")
1117
  return text
1118
 
1119
- def apply_sentence_variation(self, text):
 
1120
  """Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
1121
  sentences = self.split_into_sentences_advanced(text)
1122
  varied_sentences = []
@@ -1132,7 +1133,7 @@ class EnhancedDipperHumanizer:
1132
  current_length = len(words)
1133
 
1134
  # Natural sentence length variation
1135
- if last_sentence_length > 20 and current_length > 20:
1136
  # Break up if two long sentences in a row
1137
  if ',' in sentence:
1138
  parts = sentence.split(',', 1)
@@ -1147,8 +1148,8 @@ class EnhancedDipperHumanizer:
1147
 
1148
  # Natural combinations for flow
1149
  if (i < len(sentences) - 1 and
1150
- current_length < 10 and
1151
- len(sentences[i+1].split()) < 10):
1152
 
1153
  next_sent = sentences[i+1].strip()
1154
  # Only combine if it makes semantic sense
@@ -1364,7 +1365,8 @@ class EnhancedDipperHumanizer:
1364
 
1365
  return html_text
1366
 
1367
- def add_natural_flow_variations(self, text):
 
1368
  """Add more natural flow and rhythm variations for Originality AI"""
1369
  sentences = self.split_into_sentences_advanced(text)
1370
  enhanced_sentences = []
@@ -1373,8 +1375,8 @@ class EnhancedDipperHumanizer:
1373
  if not sentence.strip():
1374
  continue
1375
 
1376
- # Add stream-of-consciousness elements (10% chance)
1377
- if random.random() < 0.03 and len(sentence.split()) > 10:
1378
  stream_elements = [
1379
  " - wait, let me back up - ",
1380
  " - actually, scratch that - ",
@@ -1388,8 +1390,8 @@ class EnhancedDipperHumanizer:
1388
  words.insert(pos, random.choice(stream_elements))
1389
  sentence = ' '.join(words)
1390
 
1391
- # Add human-like self-corrections (5% chance)
1392
- if random.random() < 0.05:
1393
  corrections = [
1394
  " - or rather, ",
1395
  " - well, actually, ",
@@ -1407,8 +1409,8 @@ class EnhancedDipperHumanizer:
1407
  words.insert(pos, correction)
1408
  sentence = ' '.join(words)
1409
 
1410
- # Add thinking-out-loud patterns (8% chance)
1411
- if random.random() < 0.08 and i > 0:
1412
  thinking_patterns = [
1413
  "Come to think of it, ",
1414
  "Actually, you know what? ",
@@ -1426,11 +1428,45 @@ class EnhancedDipperHumanizer:
1426
 
1427
  return ' '.join(enhanced_sentences)
1428
 
1429
- def process_html(self, html_content, progress_callback=None):
1430
- """Main processing function with progress callback"""
1431
  if not html_content.strip():
1432
  return "Please provide HTML content."
1433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1434
  # Store all script and style content to preserve it
1435
  script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
1436
  style_placeholder = "###STYLE_PLACEHOLDER_{}###"
@@ -1476,24 +1512,49 @@ class EnhancedDipperHumanizer:
1476
  if len(original_text.split()) < 3:
1477
  continue
1478
 
1479
- # First pass with Dipper
1480
  paraphrased_text = self.paraphrase_with_dipper(
1481
  original_text,
1482
- lex_diversity=60,
1483
- order_diversity=20
 
 
 
 
 
 
 
 
 
 
 
 
1484
  )
1485
 
1486
- # Second pass with BART for longer texts (increased probability)
1487
  if self.use_bart and len(paraphrased_text.split()) > 8:
1488
- # 50% chance to use BART for more variation
1489
- if random.random() < 0.2:
1490
- paraphrased_text = self.paraphrase_with_bart(paraphrased_text)
 
 
 
 
1491
 
1492
- # Apply sentence variation
1493
- paraphrased_text = self.apply_sentence_variation(paraphrased_text)
 
 
 
 
1494
 
1495
- # Add natural flow variations
1496
- paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
 
 
 
 
 
1497
 
1498
  # Fix punctuation and formatting
1499
  paraphrased_text = self.fix_punctuation(paraphrased_text)
@@ -1658,8 +1719,24 @@ class EnhancedDipperHumanizer:
1658
  # Initialize the humanizer
1659
  humanizer = EnhancedDipperHumanizer()
1660
 
1661
- def humanize_html(html_input, progress=gr.Progress()):
1662
- """Gradio interface function with progress updates"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1663
  if not html_input:
1664
  return "Please provide HTML content to humanize."
1665
 
@@ -1671,10 +1748,33 @@ def humanize_html(html_input, progress=gr.Progress()):
1671
  if total > 0:
1672
  progress(current / total, desc=f"Processing: {current}/{total} elements")
1673
 
1674
- # Pass progress callback to process_html
1675
  result = humanizer.process_html(
1676
  html_input,
1677
- progress_callback=progress_callback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1678
  )
1679
 
1680
  processing_time = time.time() - start_time
@@ -1683,47 +1783,192 @@ def humanize_html(html_input, progress=gr.Progress()):
1683
 
1684
  return result
1685
 
1686
- # Create Gradio interface with queue
1687
- iface = gr.Interface(
1688
- fn=humanize_html,
1689
- inputs=[
1690
- gr.Textbox(
1691
- lines=10,
1692
- placeholder="Paste your HTML content here...",
1693
- label="HTML Input"
1694
- )
1695
- ],
1696
- outputs=gr.Textbox(
1697
- lines=10,
1698
- label="Humanized HTML Output"
1699
- ),
1700
- title="Enhanced Dipper AI Humanizer - Optimized for Originality AI",
1701
- description="""
1702
- Ultra-aggressive humanizer optimized to achieve 100% human scores on both Undetectable AI and Originality AI.
1703
 
1704
- Key Features:
1705
- - Maximum diversity settings (90% lexical, 40% order) for natural variation
1706
- - Enhanced human patterns: personal opinions, self-corrections, thinking-out-loud
1707
- - Natural typos, contractions, and conversational flow
1708
- - Stream-of-consciousness elements and rhetorical questions
1709
- - Originality AI-specific optimizations: varied sentence starters, emphatic repetitions
1710
- - Skips content in <strong>, <b>, and heading tags (including inside tables)
1711
- - Designed to pass the strictest AI detection systems
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1712
 
1713
- The tool creates genuinely human-like writing patterns that fool even the most sophisticated detectors!
 
 
 
1714
 
1715
- ⚠️ Note: Processing may take 5-10 minutes for large HTML documents.
1716
- """,
1717
- examples=[
1718
- ["""<article>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1719
  <h1>The Benefits of Regular Exercise</h1>
1720
  <div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
1721
  <p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
1722
  <p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
1723
- </article>"""]
1724
- ],
1725
- theme="default"
1726
- )
1727
 
1728
  if __name__ == "__main__":
1729
  # Enable queue for better handling of long-running processes
 
25
  class HumanLikeVariations:
26
  """Add human-like variations and intentional imperfections"""
27
 
28
+ def __init__(self, contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
29
+ typo_prob=0.02, natural_error_prob=0.05):
30
+ self.contraction_prob = contraction_prob
31
+ self.oxford_comma_prob = oxford_comma_prob
32
+ self.which_that_prob = which_that_prob
33
+ self.typo_prob = typo_prob
34
+ self.natural_error_prob = natural_error_prob
35
+
36
  # Common human writing patterns - EXPANDED for Originality AI
37
  self.casual_transitions = [
38
  "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
 
165
  # Always use contractions where natural
166
  sent = self.apply_contractions(sent)
167
 
168
+ # Add VERY occasional natural errors (based on parameter)
169
+ if random.random() < self.natural_error_prob and len(sent.split()) > 15:
170
  error_types = [
171
  # Missing comma in compound sentence
172
  lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
 
206
  }
207
 
208
  for full, contr in contractions.items():
209
+ if random.random() < self.contraction_prob: # Use configurable probability
210
  text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
211
 
212
  return text
213
 
214
  def add_minor_errors(self, text):
215
  """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
216
+ # Occasionally miss Oxford comma (based on parameter)
217
+ if random.random() < self.oxford_comma_prob:
218
  # Only in lists, not random commas
219
  text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
220
 
221
+ # Sometimes use 'which' instead of 'that' (based on parameter)
222
+ if random.random() < self.which_that_prob:
223
  # Only for non-restrictive clauses
224
  matches = re.finditer(r'\b(\w+) that (\w+)', text)
225
  for match in list(matches)[:1]: # Only first occurrence
226
  if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
227
  text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
228
 
229
+ # NEW: Add very occasional typos (based on parameter) - REDUCED AND CONTROLLED
230
  sentences = text.split('. ')
231
  for i, sent in enumerate(sentences):
232
+ if random.random() < self.typo_prob and len(sent.split()) > 15: # Only in longer sentences
233
  words = sent.split()
234
  # Pick a random word to potentially typo
235
  word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
 
274
 
275
  return text
276
 
277
+ def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
278
  """Add natural human writing patterns that Originality AI associates with human text"""
279
  sentences = self.split_into_sentences_advanced(text)
280
  result_sentences = []
 
286
  # Natural contractions throughout
287
  sentence = self.apply_contractions(sentence)
288
 
289
+ # Add natural speech patterns (based on parameter)
290
+ if random.random() < speech_prob and len(sentence.split()) > 10:
291
  # Natural interruptions that humans actually use
292
  if random.random() < 0.5:
293
  # Add "you know" or "I mean" naturally
 
304
  openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
305
  sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
306
 
307
+ # Add subtle errors that humans make (based on parameter)
308
+ if random.random() < error_prob:
309
  words = sentence.split()
310
  if len(words) > 5:
311
  # Common comma omissions
 
318
  words.insert(idx+1, words[idx])
319
  sentence = ' '.join(words)
320
 
321
+ # Natural sentence combinations (based on parameter)
322
+ if i < len(sentences) - 1 and random.random() < combine_prob:
323
  next_sent = sentences[i+1].strip()
324
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
325
  # Natural connectors based on content
 
551
  except:
552
  print("BART model not available")
553
  self.use_bart = False
 
 
 
554
 
555
+ def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
556
  """Add natural human writing patterns that Originality AI associates with human text"""
557
  sentences = self.split_into_sentences_advanced(text)
558
  result_sentences = []
 
564
  # Natural contractions throughout
565
  sentence = self.apply_contractions(sentence)
566
 
567
+ # Add natural speech patterns (based on parameter)
568
+ if random.random() < speech_prob and len(sentence.split()) > 10:
569
  # Natural interruptions that humans actually use
570
  if random.random() < 0.5:
571
  # Add "you know" or "I mean" naturally
 
582
  openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
583
  sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
584
 
585
+ # Add subtle errors that humans make (based on parameter)
586
+ if random.random() < error_prob:
587
  words = sentence.split()
588
  if len(words) > 5:
589
  # Common comma omissions
 
596
  words.insert(idx+1, words[idx])
597
  sentence = ' '.join(words)
598
 
599
+ # Natural sentence combinations (based on parameter)
600
+ if i < len(sentences) - 1 and random.random() < combine_prob:
601
  next_sent = sentences[i+1].strip()
602
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
603
  # Natural connectors based on content
 
864
 
865
  return text.strip()
866
 
867
+ def paraphrase_with_dipper(self, text, lex_diversity=75, order_diversity=30,
868
+ temperature=0.85, top_p=0.92, length_multiplier=1.4,
869
+ no_repeat_ngram=4):
870
  """Paraphrase text using Dipper model with sentence-level processing"""
871
  if not text or len(text.strip()) < 3:
872
  return text
 
884
  continue
885
 
886
  try:
887
+ # Apply diversity settings based on sentence length
888
  if len(sentence.split()) < 10:
889
+ # Use slightly lower diversity for short sentences
890
+ actual_lex = max(lex_diversity - 5, 50)
891
+ actual_order = max(order_diversity - 5, 15)
892
  else:
893
+ actual_lex = lex_diversity
894
+ actual_order = order_diversity
895
 
896
+ lex_code = int(100 - actual_lex)
897
+ order_code = int(100 - actual_order)
898
 
899
  # Format input for Dipper
900
  if self.is_dipper:
 
920
 
921
  # Generate with appropriate variation
922
  original_length = len(sentence.split())
923
+ max_new_length = int(original_length * length_multiplier)
 
 
 
 
924
 
925
  with torch.no_grad():
926
  outputs = self.model.generate(
 
928
  max_length=max_new_length + 20,
929
  min_length=max(5, int(original_length * 0.7)),
930
  do_sample=True,
931
+ top_p=top_p,
932
+ temperature=temperature,
933
+ no_repeat_ngram_size=no_repeat_ngram,
934
  num_beams=1, # Greedy for more randomness
935
  early_stopping=True
936
  )
 
967
  # Join sentences back
968
  result = ' '.join(paraphrased_sentences)
969
 
 
 
 
970
  return result
971
 
972
  def fix_incomplete_sentence_smart(self, generated, original):
 
1055
  # Clean up sentences
1056
  return [s for s in sentences if s and len(s.strip()) > 0]
1057
 
1058
+ def paraphrase_with_bart(self, text, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2):
1059
  """Additional paraphrasing with BART for more variation"""
1060
  if not self.use_bart or not text or len(text.strip()) < 3:
1061
  return text
 
1091
  **inputs,
1092
  max_length=int(original_length * 1.4) + 10,
1093
  min_length=max(5, int(original_length * 0.6)),
1094
+ num_beams=bart_beams,
1095
+ temperature=bart_temperature,
1096
  do_sample=True,
1097
+ top_p=bart_top_p,
1098
  early_stopping=True
1099
  )
1100
 
 
1116
  print(f"Error in BART paraphrasing: {str(e)}")
1117
  return text
1118
 
1119
+ def apply_sentence_variation(self, text, long_sentence_threshold=20,
1120
+ short_sentence_threshold=10):
1121
  """Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
1122
  sentences = self.split_into_sentences_advanced(text)
1123
  varied_sentences = []
 
1133
  current_length = len(words)
1134
 
1135
  # Natural sentence length variation
1136
+ if last_sentence_length > long_sentence_threshold and current_length > long_sentence_threshold:
1137
  # Break up if two long sentences in a row
1138
  if ',' in sentence:
1139
  parts = sentence.split(',', 1)
 
1148
 
1149
  # Natural combinations for flow
1150
  if (i < len(sentences) - 1 and
1151
+ current_length < short_sentence_threshold and
1152
+ len(sentences[i+1].split()) < short_sentence_threshold):
1153
 
1154
  next_sent = sentences[i+1].strip()
1155
  # Only combine if it makes semantic sense
 
1365
 
1366
  return html_text
1367
 
1368
+ def add_natural_flow_variations(self, text, stream_prob=0.08, correction_prob=0.07,
1369
+ thinking_prob=0.10):
1370
  """Add more natural flow and rhythm variations for Originality AI"""
1371
  sentences = self.split_into_sentences_advanced(text)
1372
  enhanced_sentences = []
 
1375
  if not sentence.strip():
1376
  continue
1377
 
1378
+ # Add stream-of-consciousness elements (based on parameter)
1379
+ if random.random() < stream_prob and len(sentence.split()) > 10:
1380
  stream_elements = [
1381
  " - wait, let me back up - ",
1382
  " - actually, scratch that - ",
 
1390
  words.insert(pos, random.choice(stream_elements))
1391
  sentence = ' '.join(words)
1392
 
1393
+ # Add human-like self-corrections (based on parameter)
1394
+ if random.random() < correction_prob:
1395
  corrections = [
1396
  " - or rather, ",
1397
  " - well, actually, ",
 
1409
  words.insert(pos, correction)
1410
  sentence = ' '.join(words)
1411
 
1412
+ # Add thinking-out-loud patterns (based on parameter)
1413
+ if random.random() < thinking_prob and i > 0:
1414
  thinking_patterns = [
1415
  "Come to think of it, ",
1416
  "Actually, you know what? ",
 
1428
 
1429
  return ' '.join(enhanced_sentences)
1430
 
1431
+ def process_html(self, html_content, progress_callback=None, **kwargs):
1432
+ """Main processing function with progress callback and configurable parameters"""
1433
  if not html_content.strip():
1434
  return "Please provide HTML content."
1435
 
1436
+ # Extract all parameters with defaults
1437
+ lex_diversity = kwargs.get('lex_diversity', 75)
1438
+ order_diversity = kwargs.get('order_diversity', 30)
1439
+ temperature = kwargs.get('temperature', 0.85)
1440
+ top_p = kwargs.get('top_p', 0.92)
1441
+ length_multiplier = kwargs.get('length_multiplier', 1.4)
1442
+ no_repeat_ngram = kwargs.get('no_repeat_ngram', 4)
1443
+ bart_usage_prob = kwargs.get('bart_usage_prob', 0.3)
1444
+ bart_temperature = kwargs.get('bart_temperature', 1.1)
1445
+ bart_top_p = kwargs.get('bart_top_p', 0.9)
1446
+ bart_beams = kwargs.get('bart_beams', 2)
1447
+ contraction_prob = kwargs.get('contraction_prob', 0.8)
1448
+ oxford_comma_prob = kwargs.get('oxford_comma_prob', 0.15)
1449
+ which_that_prob = kwargs.get('which_that_prob', 0.08)
1450
+ typo_prob = kwargs.get('typo_prob', 0.02)
1451
+ natural_error_prob = kwargs.get('natural_error_prob', 0.05)
1452
+ speech_pattern_prob = kwargs.get('speech_pattern_prob', 0.15)
1453
+ subtle_error_prob = kwargs.get('subtle_error_prob', 0.10)
1454
+ sentence_combine_prob = kwargs.get('sentence_combine_prob', 0.2)
1455
+ stream_conscious_prob = kwargs.get('stream_conscious_prob', 0.08)
1456
+ self_correction_prob = kwargs.get('self_correction_prob', 0.07)
1457
+ thinking_loud_prob = kwargs.get('thinking_loud_prob', 0.10)
1458
+ long_sentence_threshold = kwargs.get('long_sentence_threshold', 20)
1459
+ short_sentence_threshold = kwargs.get('short_sentence_threshold', 10)
1460
+
1461
+ # Initialize human variations with parameters
1462
+ self.human_variations = HumanLikeVariations(
1463
+ contraction_prob=contraction_prob,
1464
+ oxford_comma_prob=oxford_comma_prob,
1465
+ which_that_prob=which_that_prob,
1466
+ typo_prob=typo_prob,
1467
+ natural_error_prob=natural_error_prob
1468
+ )
1469
+
1470
  # Store all script and style content to preserve it
1471
  script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
1472
  style_placeholder = "###STYLE_PLACEHOLDER_{}###"
 
1512
  if len(original_text.split()) < 3:
1513
  continue
1514
 
1515
+ # First pass with Dipper using configured parameters
1516
  paraphrased_text = self.paraphrase_with_dipper(
1517
  original_text,
1518
+ lex_diversity=lex_diversity,
1519
+ order_diversity=order_diversity,
1520
+ temperature=temperature,
1521
+ top_p=top_p,
1522
+ length_multiplier=length_multiplier,
1523
+ no_repeat_ngram=no_repeat_ngram
1524
+ )
1525
+
1526
+ # Add natural human patterns with configured probabilities
1527
+ paraphrased_text = self.add_natural_human_patterns(
1528
+ paraphrased_text,
1529
+ speech_prob=speech_pattern_prob,
1530
+ error_prob=subtle_error_prob,
1531
+ combine_prob=sentence_combine_prob
1532
  )
1533
 
1534
+ # Second pass with BART for longer texts (based on configured probability)
1535
  if self.use_bart and len(paraphrased_text.split()) > 8:
1536
+ if random.random() < bart_usage_prob:
1537
+ paraphrased_text = self.paraphrase_with_bart(
1538
+ paraphrased_text,
1539
+ bart_temperature=bart_temperature,
1540
+ bart_top_p=bart_top_p,
1541
+ bart_beams=bart_beams
1542
+ )
1543
 
1544
+ # Apply sentence variation with configured thresholds
1545
+ paraphrased_text = self.apply_sentence_variation(
1546
+ paraphrased_text,
1547
+ long_sentence_threshold=long_sentence_threshold,
1548
+ short_sentence_threshold=short_sentence_threshold
1549
+ )
1550
 
1551
+ # Add natural flow variations with configured probabilities
1552
+ paraphrased_text = self.add_natural_flow_variations(
1553
+ paraphrased_text,
1554
+ stream_prob=stream_conscious_prob,
1555
+ correction_prob=self_correction_prob,
1556
+ thinking_prob=thinking_loud_prob
1557
+ )
1558
 
1559
  # Fix punctuation and formatting
1560
  paraphrased_text = self.fix_punctuation(paraphrased_text)
 
1719
  # Initialize the humanizer
1720
  humanizer = EnhancedDipperHumanizer()
1721
 
1722
+ def humanize_html(html_input,
1723
+ # Diversity Settings
1724
+ lex_diversity=75, order_diversity=30,
1725
+ # Generation Parameters
1726
+ temperature=0.85, top_p=0.92, length_multiplier=1.4, no_repeat_ngram=4,
1727
+ # BART Parameters
1728
+ bart_usage_prob=0.3, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2,
1729
+ # Human Variation Parameters
1730
+ contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
1731
+ typo_prob=0.02, natural_error_prob=0.05,
1732
+ # Human Pattern Frequencies
1733
+ speech_pattern_prob=0.15, subtle_error_prob=0.10, sentence_combine_prob=0.2,
1734
+ # Flow Variation Parameters
1735
+ stream_conscious_prob=0.08, self_correction_prob=0.07, thinking_loud_prob=0.10,
1736
+ # Sentence Variation Parameters
1737
+ long_sentence_threshold=20, short_sentence_threshold=10,
1738
+ progress=gr.Progress()):
1739
+ """Gradio interface function with progress updates and all parameters"""
1740
  if not html_input:
1741
  return "Please provide HTML content to humanize."
1742
 
 
1748
  if total > 0:
1749
  progress(current / total, desc=f"Processing: {current}/{total} elements")
1750
 
1751
+ # Pass all parameters to process_html
1752
  result = humanizer.process_html(
1753
  html_input,
1754
+ progress_callback=progress_callback,
1755
+ lex_diversity=lex_diversity,
1756
+ order_diversity=order_diversity,
1757
+ temperature=temperature,
1758
+ top_p=top_p,
1759
+ length_multiplier=length_multiplier,
1760
+ no_repeat_ngram=no_repeat_ngram,
1761
+ bart_usage_prob=bart_usage_prob,
1762
+ bart_temperature=bart_temperature,
1763
+ bart_top_p=bart_top_p,
1764
+ bart_beams=bart_beams,
1765
+ contraction_prob=contraction_prob,
1766
+ oxford_comma_prob=oxford_comma_prob,
1767
+ which_that_prob=which_that_prob,
1768
+ typo_prob=typo_prob,
1769
+ natural_error_prob=natural_error_prob,
1770
+ speech_pattern_prob=speech_pattern_prob,
1771
+ subtle_error_prob=subtle_error_prob,
1772
+ sentence_combine_prob=sentence_combine_prob,
1773
+ stream_conscious_prob=stream_conscious_prob,
1774
+ self_correction_prob=self_correction_prob,
1775
+ thinking_loud_prob=thinking_loud_prob,
1776
+ long_sentence_threshold=long_sentence_threshold,
1777
+ short_sentence_threshold=short_sentence_threshold
1778
  )
1779
 
1780
  processing_time = time.time() - start_time
 
1783
 
1784
  return result
1785
 
1786
+ # Create Gradio interface with all parameter inputs
1787
+ with gr.Blocks(title="Enhanced Dipper AI Humanizer - Fully Configurable") as iface:
1788
+ gr.Markdown("""
1789
+ # Enhanced Dipper AI Humanizer - Optimized for Originality AI
1790
+
1791
+ Ultra-configurable humanizer with fine-grained control over all parameters.
1792
+ Adjust settings to find the perfect balance between human score and content quality.
1793
+ """)
 
 
 
 
 
 
 
 
 
1794
 
1795
+ with gr.Row():
1796
+ with gr.Column(scale=1):
1797
+ html_input = gr.Textbox(
1798
+ lines=10,
1799
+ placeholder="Paste your HTML content here...",
1800
+ label="HTML Input"
1801
+ )
1802
+
1803
+ process_btn = gr.Button("Process HTML", variant="primary")
1804
+
1805
+ with gr.Column(scale=1):
1806
+ html_output = gr.Textbox(
1807
+ lines=10,
1808
+ label="Humanized HTML Output"
1809
+ )
1810
+
1811
+ with gr.Tabs():
1812
+ with gr.Tab("Diversity Settings"):
1813
+ gr.Markdown("**Controls how much the text is varied from the original**")
1814
+ lex_diversity = gr.Slider(0, 100, value=75, step=5,
1815
+ label="Lexical Diversity",
1816
+ info="Higher = more word variation (75 balanced, 90+ for max human score)")
1817
+ order_diversity = gr.Slider(0, 100, value=30, step=5,
1818
+ label="Order Diversity",
1819
+ info="Higher = more word reordering (30 balanced, 40+ for max human score)")
1820
+
1821
+ with gr.Tab("Generation Parameters"):
1822
+ gr.Markdown("**Fine-tune the AI model's text generation behavior**")
1823
+ temperature = gr.Slider(0.1, 2.0, value=0.85, step=0.05,
1824
+ label="Temperature",
1825
+ info="Higher = more randomness (0.85 balanced, 0.9+ for max human)")
1826
+ top_p = gr.Slider(0.1, 1.0, value=0.92, step=0.02,
1827
+ label="Top-p (nucleus sampling)",
1828
+ info="Higher = wider token selection (0.92 balanced, 0.95 for max human)")
1829
+ length_multiplier = gr.Slider(1.1, 2.0, value=1.4, step=0.1,
1830
+ label="Length Multiplier",
1831
+ info="How much longer/shorter output can be vs input")
1832
+ no_repeat_ngram = gr.Slider(2, 6, value=4, step=1,
1833
+ label="No Repeat N-gram Size",
1834
+ info="Prevents repetition of N-word phrases (4 is balanced)")
1835
+
1836
+ with gr.Tab("BART Parameters"):
1837
+ gr.Markdown("**Settings for secondary BART paraphrasing model**")
1838
+ bart_usage_prob = gr.Slider(0.0, 1.0, value=0.3, step=0.05,
1839
+ label="BART Usage Probability",
1840
+ info="Chance to use BART for additional variation")
1841
+ bart_temperature = gr.Slider(0.7, 1.5, value=1.1, step=0.05,
1842
+ label="BART Temperature",
1843
+ info="Temperature for BART model")
1844
+ bart_top_p = gr.Slider(0.8, 1.0, value=0.9, step=0.02,
1845
+ label="BART Top-p",
1846
+ info="Top-p for BART model")
1847
+ bart_beams = gr.Slider(1, 4, value=2, step=1,
1848
+ label="BART Beam Size",
1849
+ info="Number of beams for BART generation")
1850
+
1851
+ with gr.Tab("Human Variations"):
1852
+ gr.Markdown("**Control natural human-like writing patterns**")
1853
+ contraction_prob = gr.Slider(0.0, 1.0, value=0.8, step=0.05,
1854
+ label="Contraction Probability",
1855
+ info="Chance to use contractions (it's vs it is)")
1856
+ oxford_comma_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
1857
+ label="Oxford Comma Skip Probability",
1858
+ info="Chance to skip Oxford comma (human-like error)")
1859
+ which_that_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
1860
+ label="Which/That Substitution",
1861
+ info="Chance to use 'which' instead of 'that'")
1862
+ typo_prob = gr.Slider(0.0, 0.1, value=0.02, step=0.01,
1863
+ label="Typo Probability",
1864
+ info="Chance of natural typos per sentence")
1865
+ natural_error_prob = gr.Slider(0.0, 0.2, value=0.05, step=0.01,
1866
+ label="Natural Error Probability",
1867
+ info="Chance of human-like errors (missing commas, etc)")
1868
+
1869
+ with gr.Tab("Human Pattern Frequencies"):
1870
+ gr.Markdown("**Frequency of conversational elements**")
1871
+ speech_pattern_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
1872
+ label="Speech Pattern Probability",
1873
+ info="Chance to add 'you know', 'I mean', etc.")
1874
+ subtle_error_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.05,
1875
+ label="Subtle Error Probability",
1876
+ info="Chance of subtle human errors")
1877
+ sentence_combine_prob = gr.Slider(0.0, 0.5, value=0.2, step=0.05,
1878
+ label="Sentence Combination Probability",
1879
+ info="Chance to naturally combine short sentences")
1880
+
1881
+ with gr.Tab("Flow Variations"):
1882
+ gr.Markdown("**Advanced human-like flow patterns**")
1883
+ stream_conscious_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
1884
+ label="Stream of Consciousness",
1885
+ info="Chance to add thinking interruptions")
1886
+ self_correction_prob = gr.Slider(0.0, 0.2, value=0.07, step=0.02,
1887
+ label="Self-Correction Probability",
1888
+ info="Chance to add 'or rather', 'I mean' corrections")
1889
+ thinking_loud_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.02,
1890
+ label="Thinking Out Loud",
1891
+ info="Chance to add 'Come to think of it' patterns")
1892
+
1893
+ with gr.Tab("Sentence Structure"):
1894
+ gr.Markdown("**Control sentence length variation**")
1895
+ long_sentence_threshold = gr.Slider(10, 40, value=20, step=2,
1896
+ label="Long Sentence Threshold",
1897
+ info="Words count to consider sentence 'long'")
1898
+ short_sentence_threshold = gr.Slider(5, 15, value=10, step=1,
1899
+ label="Short Sentence Threshold",
1900
+ info="Words count to consider sentence 'short'")
1901
+
1902
+ with gr.Accordion("Preset Configurations", open=False):
1903
+ gr.Markdown("""
1904
+ ### Quick Presets:
1905
+ - **Balanced (Default)**: Current settings - good quality with high human score
1906
+ - **Maximum Human**: Increase all diversity and variation parameters
1907
+ - **Quality Focus**: Decrease variation parameters for cleaner output
1908
+ - **Natural Flow**: Increase flow variations and speech patterns
1909
+ """)
1910
+
1911
+ preset_buttons = gr.Row()
1912
+ with preset_buttons:
1913
+ balanced_btn = gr.Button("Load Balanced", scale=1)
1914
+ max_human_btn = gr.Button("Load Max Human", scale=1)
1915
+ quality_btn = gr.Button("Load Quality Focus", scale=1)
1916
+ natural_btn = gr.Button("Load Natural Flow", scale=1)
1917
+
1918
+ # Define preset configurations
1919
+ def load_balanced():
1920
+ return [75, 30, 0.85, 0.92, 1.4, 4, 0.3, 1.1, 0.9, 2,
1921
+ 0.8, 0.15, 0.08, 0.02, 0.05, 0.15, 0.10, 0.2,
1922
+ 0.08, 0.07, 0.10, 20, 10]
1923
 
1924
+ def load_max_human():
1925
+ return [90, 40, 0.95, 0.95, 1.5, 4, 0.4, 1.2, 0.95, 2,
1926
+ 0.9, 0.20, 0.12, 0.04, 0.08, 0.25, 0.15, 0.3,
1927
+ 0.15, 0.10, 0.15, 20, 10]
1928
 
1929
+ def load_quality():
1930
+ return [65, 20, 0.75, 0.88, 1.3, 4, 0.2, 1.0, 0.85, 3,
1931
+ 0.7, 0.10, 0.05, 0.01, 0.03, 0.08, 0.05, 0.15,
1932
+ 0.03, 0.03, 0.05, 25, 8]
1933
+
1934
+ def load_natural():
1935
+ return [70, 25, 0.82, 0.90, 1.4, 4, 0.35, 1.1, 0.9, 2,
1936
+ 0.85, 0.12, 0.06, 0.02, 0.04, 0.20, 0.12, 0.25,
1937
+ 0.12, 0.10, 0.15, 18, 12]
1938
+
1939
+ # All parameter components for preset updates
1940
+ all_params = [
1941
+ lex_diversity, order_diversity, temperature, top_p, length_multiplier, no_repeat_ngram,
1942
+ bart_usage_prob, bart_temperature, bart_top_p, bart_beams,
1943
+ contraction_prob, oxford_comma_prob, which_that_prob, typo_prob, natural_error_prob,
1944
+ speech_pattern_prob, subtle_error_prob, sentence_combine_prob,
1945
+ stream_conscious_prob, self_correction_prob, thinking_loud_prob,
1946
+ long_sentence_threshold, short_sentence_threshold
1947
+ ]
1948
+
1949
+ # Connect preset buttons
1950
+ balanced_btn.click(load_balanced, outputs=all_params)
1951
+ max_human_btn.click(load_max_human, outputs=all_params)
1952
+ quality_btn.click(load_quality, outputs=all_params)
1953
+ natural_btn.click(load_natural, outputs=all_params)
1954
+
1955
+ # Connect main process button
1956
+ process_btn.click(
1957
+ humanize_html,
1958
+ inputs=[html_input] + all_params,
1959
+ outputs=html_output
1960
+ )
1961
+
1962
+ # Add example
1963
+ gr.Examples(
1964
+ examples=[["""<article>
1965
  <h1>The Benefits of Regular Exercise</h1>
1966
  <div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
1967
  <p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
1968
  <p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
1969
+ </article>"""]],
1970
+ inputs=html_input
1971
+ )
 
1972
 
1973
  if __name__ == "__main__":
1974
  # Enable queue for better handling of long-running processes