EdysorEdutech commited on
Commit
e372523
·
verified ·
1 Parent(s): e2ec5b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -357
app.py CHANGED
@@ -25,14 +25,7 @@ except:
25
  class HumanLikeVariations:
26
  """Add human-like variations and intentional imperfections"""
27
 
28
- def __init__(self, contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
29
- typo_prob=0.02, natural_error_prob=0.05):
30
- self.contraction_prob = contraction_prob
31
- self.oxford_comma_prob = oxford_comma_prob
32
- self.which_that_prob = which_that_prob
33
- self.typo_prob = typo_prob
34
- self.natural_error_prob = natural_error_prob
35
-
36
  # Common human writing patterns - EXPANDED for Originality AI
37
  self.casual_transitions = [
38
  "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
@@ -165,8 +158,8 @@ class HumanLikeVariations:
165
  # Always use contractions where natural
166
  sent = self.apply_contractions(sent)
167
 
168
- # Add VERY occasional natural errors (based on parameter)
169
- if random.random() < self.natural_error_prob and len(sent.split()) > 15:
170
  error_types = [
171
  # Missing comma in compound sentence
172
  lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
@@ -206,30 +199,30 @@ class HumanLikeVariations:
206
  }
207
 
208
  for full, contr in contractions.items():
209
- if random.random() < self.contraction_prob: # Use configurable probability
210
  text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
211
 
212
  return text
213
 
214
  def add_minor_errors(self, text):
215
  """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
216
- # Occasionally miss Oxford comma (based on parameter)
217
- if random.random() < self.oxford_comma_prob:
218
  # Only in lists, not random commas
219
  text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
220
 
221
- # Sometimes use 'which' instead of 'that' (based on parameter)
222
- if random.random() < self.which_that_prob:
223
  # Only for non-restrictive clauses
224
  matches = re.finditer(r'\b(\w+) that (\w+)', text)
225
  for match in list(matches)[:1]: # Only first occurrence
226
  if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
227
  text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
228
 
229
- # NEW: Add very occasional typos (based on parameter) - REDUCED AND CONTROLLED
230
  sentences = text.split('. ')
231
  for i, sent in enumerate(sentences):
232
- if random.random() < self.typo_prob and len(sent.split()) > 15: # Only in longer sentences
233
  words = sent.split()
234
  # Pick a random word to potentially typo
235
  word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
@@ -274,7 +267,7 @@ class HumanLikeVariations:
274
 
275
  return text
276
 
277
- def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
278
  """Add natural human writing patterns that Originality AI associates with human text"""
279
  sentences = self.split_into_sentences_advanced(text)
280
  result_sentences = []
@@ -286,8 +279,8 @@ class HumanLikeVariations:
286
  # Natural contractions throughout
287
  sentence = self.apply_contractions(sentence)
288
 
289
- # Add natural speech patterns (based on parameter)
290
- if random.random() < speech_prob and len(sentence.split()) > 10:
291
  # Natural interruptions that humans actually use
292
  if random.random() < 0.5:
293
  # Add "you know" or "I mean" naturally
@@ -304,8 +297,8 @@ class HumanLikeVariations:
304
  openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
305
  sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
306
 
307
- # Add subtle errors that humans make (based on parameter)
308
- if random.random() < error_prob:
309
  words = sentence.split()
310
  if len(words) > 5:
311
  # Common comma omissions
@@ -318,8 +311,8 @@ class HumanLikeVariations:
318
  words.insert(idx+1, words[idx])
319
  sentence = ' '.join(words)
320
 
321
- # Natural sentence combinations (based on parameter)
322
- if i < len(sentences) - 1 and random.random() < combine_prob:
323
  next_sent = sentences[i+1].strip()
324
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
325
  # Natural connectors based on content
@@ -551,8 +544,11 @@ class EnhancedDipperHumanizer:
551
  except:
552
  print("BART model not available")
553
  self.use_bart = False
 
 
 
554
 
555
- def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
556
  """Add natural human writing patterns that Originality AI associates with human text"""
557
  sentences = self.split_into_sentences_advanced(text)
558
  result_sentences = []
@@ -564,8 +560,8 @@ class EnhancedDipperHumanizer:
564
  # Natural contractions throughout
565
  sentence = self.apply_contractions(sentence)
566
 
567
- # Add natural speech patterns (based on parameter)
568
- if random.random() < speech_prob and len(sentence.split()) > 10:
569
  # Natural interruptions that humans actually use
570
  if random.random() < 0.5:
571
  # Add "you know" or "I mean" naturally
@@ -582,8 +578,8 @@ class EnhancedDipperHumanizer:
582
  openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
583
  sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
584
 
585
- # Add subtle errors that humans make (based on parameter)
586
- if random.random() < error_prob:
587
  words = sentence.split()
588
  if len(words) > 5:
589
  # Common comma omissions
@@ -596,8 +592,8 @@ class EnhancedDipperHumanizer:
596
  words.insert(idx+1, words[idx])
597
  sentence = ' '.join(words)
598
 
599
- # Natural sentence combinations (based on parameter)
600
- if i < len(sentences) - 1 and random.random() < combine_prob:
601
  next_sent = sentences[i+1].strip()
602
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
603
  # Natural connectors based on content
@@ -864,9 +860,7 @@ class EnhancedDipperHumanizer:
864
 
865
  return text.strip()
866
 
867
- def paraphrase_with_dipper(self, text, lex_diversity=75, order_diversity=30,
868
- temperature=0.85, top_p=0.92, length_multiplier=1.4,
869
- no_repeat_ngram=4):
870
  """Paraphrase text using Dipper model with sentence-level processing"""
871
  if not text or len(text.strip()) < 3:
872
  return text
@@ -884,17 +878,16 @@ class EnhancedDipperHumanizer:
884
  continue
885
 
886
  try:
887
- # Apply diversity settings based on sentence length
888
  if len(sentence.split()) < 10:
889
- # Use slightly lower diversity for short sentences
890
- actual_lex = max(lex_diversity - 5, 50)
891
- actual_order = max(order_diversity - 5, 15)
892
  else:
893
- actual_lex = lex_diversity
894
- actual_order = order_diversity
895
 
896
- lex_code = int(100 - actual_lex)
897
- order_code = int(100 - actual_order)
898
 
899
  # Format input for Dipper
900
  if self.is_dipper:
@@ -920,7 +913,11 @@ class EnhancedDipperHumanizer:
920
 
921
  # Generate with appropriate variation
922
  original_length = len(sentence.split())
923
- max_new_length = int(original_length * length_multiplier)
 
 
 
 
924
 
925
  with torch.no_grad():
926
  outputs = self.model.generate(
@@ -928,9 +925,9 @@ class EnhancedDipperHumanizer:
928
  max_length=max_new_length + 20,
929
  min_length=max(5, int(original_length * 0.7)),
930
  do_sample=True,
931
- top_p=top_p,
932
- temperature=temperature,
933
- no_repeat_ngram_size=no_repeat_ngram,
934
  num_beams=1, # Greedy for more randomness
935
  early_stopping=True
936
  )
@@ -967,6 +964,9 @@ class EnhancedDipperHumanizer:
967
  # Join sentences back
968
  result = ' '.join(paraphrased_sentences)
969
 
 
 
 
970
  return result
971
 
972
  def fix_incomplete_sentence_smart(self, generated, original):
@@ -1055,7 +1055,7 @@ class EnhancedDipperHumanizer:
1055
  # Clean up sentences
1056
  return [s for s in sentences if s and len(s.strip()) > 0]
1057
 
1058
- def paraphrase_with_bart(self, text, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2):
1059
  """Additional paraphrasing with BART for more variation"""
1060
  if not self.use_bart or not text or len(text.strip()) < 3:
1061
  return text
@@ -1091,10 +1091,10 @@ class EnhancedDipperHumanizer:
1091
  **inputs,
1092
  max_length=int(original_length * 1.4) + 10,
1093
  min_length=max(5, int(original_length * 0.6)),
1094
- num_beams=bart_beams,
1095
- temperature=bart_temperature,
1096
  do_sample=True,
1097
- top_p=bart_top_p,
1098
  early_stopping=True
1099
  )
1100
 
@@ -1116,8 +1116,7 @@ class EnhancedDipperHumanizer:
1116
  print(f"Error in BART paraphrasing: {str(e)}")
1117
  return text
1118
 
1119
- def apply_sentence_variation(self, text, long_sentence_threshold=20,
1120
- short_sentence_threshold=10):
1121
  """Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
1122
  sentences = self.split_into_sentences_advanced(text)
1123
  varied_sentences = []
@@ -1133,7 +1132,7 @@ class EnhancedDipperHumanizer:
1133
  current_length = len(words)
1134
 
1135
  # Natural sentence length variation
1136
- if last_sentence_length > long_sentence_threshold and current_length > long_sentence_threshold:
1137
  # Break up if two long sentences in a row
1138
  if ',' in sentence:
1139
  parts = sentence.split(',', 1)
@@ -1148,8 +1147,8 @@ class EnhancedDipperHumanizer:
1148
 
1149
  # Natural combinations for flow
1150
  if (i < len(sentences) - 1 and
1151
- current_length < short_sentence_threshold and
1152
- len(sentences[i+1].split()) < short_sentence_threshold):
1153
 
1154
  next_sent = sentences[i+1].strip()
1155
  # Only combine if it makes semantic sense
@@ -1365,8 +1364,7 @@ class EnhancedDipperHumanizer:
1365
 
1366
  return html_text
1367
 
1368
- def add_natural_flow_variations(self, text, stream_prob=0.08, correction_prob=0.07,
1369
- thinking_prob=0.10):
1370
  """Add more natural flow and rhythm variations for Originality AI"""
1371
  sentences = self.split_into_sentences_advanced(text)
1372
  enhanced_sentences = []
@@ -1375,8 +1373,8 @@ class EnhancedDipperHumanizer:
1375
  if not sentence.strip():
1376
  continue
1377
 
1378
- # Add stream-of-consciousness elements (based on parameter)
1379
- if random.random() < stream_prob and len(sentence.split()) > 10:
1380
  stream_elements = [
1381
  " - wait, let me back up - ",
1382
  " - actually, scratch that - ",
@@ -1390,8 +1388,8 @@ class EnhancedDipperHumanizer:
1390
  words.insert(pos, random.choice(stream_elements))
1391
  sentence = ' '.join(words)
1392
 
1393
- # Add human-like self-corrections (based on parameter)
1394
- if random.random() < correction_prob:
1395
  corrections = [
1396
  " - or rather, ",
1397
  " - well, actually, ",
@@ -1409,8 +1407,8 @@ class EnhancedDipperHumanizer:
1409
  words.insert(pos, correction)
1410
  sentence = ' '.join(words)
1411
 
1412
- # Add thinking-out-loud patterns (based on parameter)
1413
- if random.random() < thinking_prob and i > 0:
1414
  thinking_patterns = [
1415
  "Come to think of it, ",
1416
  "Actually, you know what? ",
@@ -1428,45 +1426,11 @@ class EnhancedDipperHumanizer:
1428
 
1429
  return ' '.join(enhanced_sentences)
1430
 
1431
- def process_html(self, html_content, progress_callback=None, **kwargs):
1432
- """Main processing function with progress callback and configurable parameters"""
1433
  if not html_content.strip():
1434
  return "Please provide HTML content."
1435
 
1436
- # Extract all parameters with defaults
1437
- lex_diversity = kwargs.get('lex_diversity', 75)
1438
- order_diversity = kwargs.get('order_diversity', 30)
1439
- temperature = kwargs.get('temperature', 0.85)
1440
- top_p = kwargs.get('top_p', 0.92)
1441
- length_multiplier = kwargs.get('length_multiplier', 1.4)
1442
- no_repeat_ngram = kwargs.get('no_repeat_ngram', 4)
1443
- bart_usage_prob = kwargs.get('bart_usage_prob', 0.3)
1444
- bart_temperature = kwargs.get('bart_temperature', 1.1)
1445
- bart_top_p = kwargs.get('bart_top_p', 0.9)
1446
- bart_beams = kwargs.get('bart_beams', 2)
1447
- contraction_prob = kwargs.get('contraction_prob', 0.8)
1448
- oxford_comma_prob = kwargs.get('oxford_comma_prob', 0.15)
1449
- which_that_prob = kwargs.get('which_that_prob', 0.08)
1450
- typo_prob = kwargs.get('typo_prob', 0.02)
1451
- natural_error_prob = kwargs.get('natural_error_prob', 0.05)
1452
- speech_pattern_prob = kwargs.get('speech_pattern_prob', 0.15)
1453
- subtle_error_prob = kwargs.get('subtle_error_prob', 0.10)
1454
- sentence_combine_prob = kwargs.get('sentence_combine_prob', 0.2)
1455
- stream_conscious_prob = kwargs.get('stream_conscious_prob', 0.08)
1456
- self_correction_prob = kwargs.get('self_correction_prob', 0.07)
1457
- thinking_loud_prob = kwargs.get('thinking_loud_prob', 0.10)
1458
- long_sentence_threshold = kwargs.get('long_sentence_threshold', 20)
1459
- short_sentence_threshold = kwargs.get('short_sentence_threshold', 10)
1460
-
1461
- # Initialize human variations with parameters
1462
- self.human_variations = HumanLikeVariations(
1463
- contraction_prob=contraction_prob,
1464
- oxford_comma_prob=oxford_comma_prob,
1465
- which_that_prob=which_that_prob,
1466
- typo_prob=typo_prob,
1467
- natural_error_prob=natural_error_prob
1468
- )
1469
-
1470
  # Store all script and style content to preserve it
1471
  script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
1472
  style_placeholder = "###STYLE_PLACEHOLDER_{}###"
@@ -1512,49 +1476,24 @@ class EnhancedDipperHumanizer:
1512
  if len(original_text.split()) < 3:
1513
  continue
1514
 
1515
- # First pass with Dipper using configured parameters
1516
  paraphrased_text = self.paraphrase_with_dipper(
1517
  original_text,
1518
- lex_diversity=lex_diversity,
1519
- order_diversity=order_diversity,
1520
- temperature=temperature,
1521
- top_p=top_p,
1522
- length_multiplier=length_multiplier,
1523
- no_repeat_ngram=no_repeat_ngram
1524
- )
1525
-
1526
- # Add natural human patterns with configured probabilities
1527
- paraphrased_text = self.add_natural_human_patterns(
1528
- paraphrased_text,
1529
- speech_prob=speech_pattern_prob,
1530
- error_prob=subtle_error_prob,
1531
- combine_prob=sentence_combine_prob
1532
  )
1533
 
1534
- # Second pass with BART for longer texts (based on configured probability)
1535
  if self.use_bart and len(paraphrased_text.split()) > 8:
1536
- if random.random() < bart_usage_prob:
1537
- paraphrased_text = self.paraphrase_with_bart(
1538
- paraphrased_text,
1539
- bart_temperature=bart_temperature,
1540
- bart_top_p=bart_top_p,
1541
- bart_beams=bart_beams
1542
- )
1543
 
1544
- # Apply sentence variation with configured thresholds
1545
- paraphrased_text = self.apply_sentence_variation(
1546
- paraphrased_text,
1547
- long_sentence_threshold=long_sentence_threshold,
1548
- short_sentence_threshold=short_sentence_threshold
1549
- )
1550
 
1551
- # Add natural flow variations with configured probabilities
1552
- paraphrased_text = self.add_natural_flow_variations(
1553
- paraphrased_text,
1554
- stream_prob=stream_conscious_prob,
1555
- correction_prob=self_correction_prob,
1556
- thinking_prob=thinking_loud_prob
1557
- )
1558
 
1559
  # Fix punctuation and formatting
1560
  paraphrased_text = self.fix_punctuation(paraphrased_text)
@@ -1719,24 +1658,8 @@ class EnhancedDipperHumanizer:
1719
  # Initialize the humanizer
1720
  humanizer = EnhancedDipperHumanizer()
1721
 
1722
- def humanize_html(html_input,
1723
- # Diversity Settings
1724
- lex_diversity=75, order_diversity=30,
1725
- # Generation Parameters
1726
- temperature=0.85, top_p=0.92, length_multiplier=1.4, no_repeat_ngram=4,
1727
- # BART Parameters
1728
- bart_usage_prob=0.3, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2,
1729
- # Human Variation Parameters
1730
- contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
1731
- typo_prob=0.02, natural_error_prob=0.05,
1732
- # Human Pattern Frequencies
1733
- speech_pattern_prob=0.15, subtle_error_prob=0.10, sentence_combine_prob=0.2,
1734
- # Flow Variation Parameters
1735
- stream_conscious_prob=0.08, self_correction_prob=0.07, thinking_loud_prob=0.10,
1736
- # Sentence Variation Parameters
1737
- long_sentence_threshold=20, short_sentence_threshold=10,
1738
- progress=gr.Progress()):
1739
- """Gradio interface function with progress updates and all parameters"""
1740
  if not html_input:
1741
  return "Please provide HTML content to humanize."
1742
 
@@ -1748,33 +1671,10 @@ def humanize_html(html_input,
1748
  if total > 0:
1749
  progress(current / total, desc=f"Processing: {current}/{total} elements")
1750
 
1751
- # Pass all parameters to process_html
1752
  result = humanizer.process_html(
1753
  html_input,
1754
- progress_callback=progress_callback,
1755
- lex_diversity=lex_diversity,
1756
- order_diversity=order_diversity,
1757
- temperature=temperature,
1758
- top_p=top_p,
1759
- length_multiplier=length_multiplier,
1760
- no_repeat_ngram=no_repeat_ngram,
1761
- bart_usage_prob=bart_usage_prob,
1762
- bart_temperature=bart_temperature,
1763
- bart_top_p=bart_top_p,
1764
- bart_beams=bart_beams,
1765
- contraction_prob=contraction_prob,
1766
- oxford_comma_prob=oxford_comma_prob,
1767
- which_that_prob=which_that_prob,
1768
- typo_prob=typo_prob,
1769
- natural_error_prob=natural_error_prob,
1770
- speech_pattern_prob=speech_pattern_prob,
1771
- subtle_error_prob=subtle_error_prob,
1772
- sentence_combine_prob=sentence_combine_prob,
1773
- stream_conscious_prob=stream_conscious_prob,
1774
- self_correction_prob=self_correction_prob,
1775
- thinking_loud_prob=thinking_loud_prob,
1776
- long_sentence_threshold=long_sentence_threshold,
1777
- short_sentence_threshold=short_sentence_threshold
1778
  )
1779
 
1780
  processing_time = time.time() - start_time
@@ -1783,192 +1683,47 @@ def humanize_html(html_input,
1783
 
1784
  return result
1785
 
1786
- # Create Gradio interface with all parameter inputs
1787
- with gr.Blocks(title="Enhanced Dipper AI Humanizer - Fully Configurable") as iface:
1788
- gr.Markdown("""
1789
- # Enhanced Dipper AI Humanizer - Optimized for Originality AI
1790
-
1791
- Ultra-configurable humanizer with fine-grained control over all parameters.
1792
- Adjust settings to find the perfect balance between human score and content quality.
1793
- """)
1794
-
1795
- with gr.Row():
1796
- with gr.Column(scale=1):
1797
- html_input = gr.Textbox(
1798
- lines=10,
1799
- placeholder="Paste your HTML content here...",
1800
- label="HTML Input"
1801
- )
1802
-
1803
- process_btn = gr.Button("Process HTML", variant="primary")
1804
-
1805
- with gr.Column(scale=1):
1806
- html_output = gr.Textbox(
1807
- lines=10,
1808
- label="Humanized HTML Output"
1809
- )
1810
-
1811
- with gr.Tabs():
1812
- with gr.Tab("Diversity Settings"):
1813
- gr.Markdown("**Controls how much the text is varied from the original**")
1814
- lex_diversity = gr.Slider(0, 100, value=75, step=5,
1815
- label="Lexical Diversity",
1816
- info="Higher = more word variation (75 balanced, 90+ for max human score)")
1817
- order_diversity = gr.Slider(0, 100, value=30, step=5,
1818
- label="Order Diversity",
1819
- info="Higher = more word reordering (30 balanced, 40+ for max human score)")
1820
-
1821
- with gr.Tab("Generation Parameters"):
1822
- gr.Markdown("**Fine-tune the AI model's text generation behavior**")
1823
- temperature = gr.Slider(0.1, 2.0, value=0.85, step=0.05,
1824
- label="Temperature",
1825
- info="Higher = more randomness (0.85 balanced, 0.9+ for max human)")
1826
- top_p = gr.Slider(0.1, 1.0, value=0.92, step=0.02,
1827
- label="Top-p (nucleus sampling)",
1828
- info="Higher = wider token selection (0.92 balanced, 0.95 for max human)")
1829
- length_multiplier = gr.Slider(1.1, 2.0, value=1.4, step=0.1,
1830
- label="Length Multiplier",
1831
- info="How much longer/shorter output can be vs input")
1832
- no_repeat_ngram = gr.Slider(2, 6, value=4, step=1,
1833
- label="No Repeat N-gram Size",
1834
- info="Prevents repetition of N-word phrases (4 is balanced)")
1835
-
1836
- with gr.Tab("BART Parameters"):
1837
- gr.Markdown("**Settings for secondary BART paraphrasing model**")
1838
- bart_usage_prob = gr.Slider(0.0, 1.0, value=0.3, step=0.05,
1839
- label="BART Usage Probability",
1840
- info="Chance to use BART for additional variation")
1841
- bart_temperature = gr.Slider(0.7, 1.5, value=1.1, step=0.05,
1842
- label="BART Temperature",
1843
- info="Temperature for BART model")
1844
- bart_top_p = gr.Slider(0.8, 1.0, value=0.9, step=0.02,
1845
- label="BART Top-p",
1846
- info="Top-p for BART model")
1847
- bart_beams = gr.Slider(1, 4, value=2, step=1,
1848
- label="BART Beam Size",
1849
- info="Number of beams for BART generation")
1850
-
1851
- with gr.Tab("Human Variations"):
1852
- gr.Markdown("**Control natural human-like writing patterns**")
1853
- contraction_prob = gr.Slider(0.0, 1.0, value=0.8, step=0.05,
1854
- label="Contraction Probability",
1855
- info="Chance to use contractions (it's vs it is)")
1856
- oxford_comma_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
1857
- label="Oxford Comma Skip Probability",
1858
- info="Chance to skip Oxford comma (human-like error)")
1859
- which_that_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
1860
- label="Which/That Substitution",
1861
- info="Chance to use 'which' instead of 'that'")
1862
- typo_prob = gr.Slider(0.0, 0.1, value=0.02, step=0.01,
1863
- label="Typo Probability",
1864
- info="Chance of natural typos per sentence")
1865
- natural_error_prob = gr.Slider(0.0, 0.2, value=0.05, step=0.01,
1866
- label="Natural Error Probability",
1867
- info="Chance of human-like errors (missing commas, etc)")
1868
-
1869
- with gr.Tab("Human Pattern Frequencies"):
1870
- gr.Markdown("**Frequency of conversational elements**")
1871
- speech_pattern_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
1872
- label="Speech Pattern Probability",
1873
- info="Chance to add 'you know', 'I mean', etc.")
1874
- subtle_error_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.05,
1875
- label="Subtle Error Probability",
1876
- info="Chance of subtle human errors")
1877
- sentence_combine_prob = gr.Slider(0.0, 0.5, value=0.2, step=0.05,
1878
- label="Sentence Combination Probability",
1879
- info="Chance to naturally combine short sentences")
1880
-
1881
- with gr.Tab("Flow Variations"):
1882
- gr.Markdown("**Advanced human-like flow patterns**")
1883
- stream_conscious_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
1884
- label="Stream of Consciousness",
1885
- info="Chance to add thinking interruptions")
1886
- self_correction_prob = gr.Slider(0.0, 0.2, value=0.07, step=0.02,
1887
- label="Self-Correction Probability",
1888
- info="Chance to add 'or rather', 'I mean' corrections")
1889
- thinking_loud_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.02,
1890
- label="Thinking Out Loud",
1891
- info="Chance to add 'Come to think of it' patterns")
1892
-
1893
- with gr.Tab("Sentence Structure"):
1894
- gr.Markdown("**Control sentence length variation**")
1895
- long_sentence_threshold = gr.Slider(10, 40, value=20, step=2,
1896
- label="Long Sentence Threshold",
1897
- info="Words count to consider sentence 'long'")
1898
- short_sentence_threshold = gr.Slider(5, 15, value=10, step=1,
1899
- label="Short Sentence Threshold",
1900
- info="Words count to consider sentence 'short'")
1901
-
1902
- with gr.Accordion("Preset Configurations", open=False):
1903
- gr.Markdown("""
1904
- ### Quick Presets:
1905
- - **Balanced (Default)**: Current settings - good quality with high human score
1906
- - **Maximum Human**: Increase all diversity and variation parameters
1907
- - **Quality Focus**: Decrease variation parameters for cleaner output
1908
- - **Natural Flow**: Increase flow variations and speech patterns
1909
- """)
1910
-
1911
- preset_buttons = gr.Row()
1912
- with preset_buttons:
1913
- balanced_btn = gr.Button("Load Balanced", scale=1)
1914
- max_human_btn = gr.Button("Load Max Human", scale=1)
1915
- quality_btn = gr.Button("Load Quality Focus", scale=1)
1916
- natural_btn = gr.Button("Load Natural Flow", scale=1)
1917
-
1918
- # Define preset configurations
1919
- def load_balanced():
1920
- return [75, 30, 0.85, 0.92, 1.4, 4, 0.3, 1.1, 0.9, 2,
1921
- 0.8, 0.15, 0.08, 0.02, 0.05, 0.15, 0.10, 0.2,
1922
- 0.08, 0.07, 0.10, 20, 10]
1923
-
1924
- def load_max_human():
1925
- return [90, 40, 0.95, 0.95, 1.5, 4, 0.4, 1.2, 0.95, 2,
1926
- 0.9, 0.20, 0.12, 0.04, 0.08, 0.25, 0.15, 0.3,
1927
- 0.15, 0.10, 0.15, 20, 10]
1928
-
1929
- def load_quality():
1930
- return [65, 20, 0.75, 0.88, 1.3, 4, 0.2, 1.0, 0.85, 3,
1931
- 0.7, 0.10, 0.05, 0.01, 0.03, 0.08, 0.05, 0.15,
1932
- 0.03, 0.03, 0.05, 25, 8]
1933
-
1934
- def load_natural():
1935
- return [70, 25, 0.82, 0.90, 1.4, 4, 0.35, 1.1, 0.9, 2,
1936
- 0.85, 0.12, 0.06, 0.02, 0.04, 0.20, 0.12, 0.25,
1937
- 0.12, 0.10, 0.15, 18, 12]
1938
-
1939
- # All parameter components for preset updates
1940
- all_params = [
1941
- lex_diversity, order_diversity, temperature, top_p, length_multiplier, no_repeat_ngram,
1942
- bart_usage_prob, bart_temperature, bart_top_p, bart_beams,
1943
- contraction_prob, oxford_comma_prob, which_that_prob, typo_prob, natural_error_prob,
1944
- speech_pattern_prob, subtle_error_prob, sentence_combine_prob,
1945
- stream_conscious_prob, self_correction_prob, thinking_loud_prob,
1946
- long_sentence_threshold, short_sentence_threshold
1947
- ]
1948
 
1949
- # Connect preset buttons
1950
- balanced_btn.click(load_balanced, outputs=all_params)
1951
- max_human_btn.click(load_max_human, outputs=all_params)
1952
- quality_btn.click(load_quality, outputs=all_params)
1953
- natural_btn.click(load_natural, outputs=all_params)
 
 
 
1954
 
1955
- # Connect main process button
1956
- process_btn.click(
1957
- humanize_html,
1958
- inputs=[html_input] + all_params,
1959
- outputs=html_output
1960
- )
1961
 
1962
- # Add example
1963
- gr.Examples(
1964
- examples=[["""<article>
 
1965
  <h1>The Benefits of Regular Exercise</h1>
1966
  <div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
1967
  <p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
1968
  <p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
1969
- </article>"""]],
1970
- inputs=html_input
1971
- )
 
1972
 
1973
  if __name__ == "__main__":
1974
  # Enable queue for better handling of long-running processes
 
25
  class HumanLikeVariations:
26
  """Add human-like variations and intentional imperfections"""
27
 
28
+ def __init__(self):
 
 
 
 
 
 
 
29
  # Common human writing patterns - EXPANDED for Originality AI
30
  self.casual_transitions = [
31
  "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
 
158
  # Always use contractions where natural
159
  sent = self.apply_contractions(sent)
160
 
161
+ # Add VERY occasional natural errors (5% chance)
162
+ if random.random() < 0.05 and len(sent.split()) > 15:
163
  error_types = [
164
  # Missing comma in compound sentence
165
  lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
 
199
  }
200
 
201
  for full, contr in contractions.items():
202
+ if random.random() < 0.8: # 80% chance to apply each contraction
203
  text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
204
 
205
  return text
206
 
207
  def add_minor_errors(self, text):
208
  """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
209
+ # Occasionally miss Oxford comma (15% chance)
210
+ if random.random() < 0.15:
211
  # Only in lists, not random commas
212
  text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
213
 
214
+ # Sometimes use 'which' instead of 'that' (8% chance)
215
+ if random.random() < 0.08:
216
  # Only for non-restrictive clauses
217
  matches = re.finditer(r'\b(\w+) that (\w+)', text)
218
  for match in list(matches)[:1]: # Only first occurrence
219
  if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
220
  text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
221
 
222
+ # NEW: Add very occasional typos (2% chance per sentence) - REDUCED AND CONTROLLED
223
  sentences = text.split('. ')
224
  for i, sent in enumerate(sentences):
225
+ if random.random() < 0.02 and len(sent.split()) > 15: # Only in longer sentences
226
  words = sent.split()
227
  # Pick a random word to potentially typo
228
  word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
 
267
 
268
  return text
269
 
270
+ def add_natural_human_patterns(self, text):
271
  """Add natural human writing patterns that Originality AI associates with human text"""
272
  sentences = self.split_into_sentences_advanced(text)
273
  result_sentences = []
 
279
  # Natural contractions throughout
280
  sentence = self.apply_contractions(sentence)
281
 
282
+ # Add natural speech patterns (15% chance)
283
+ if random.random() < 0.15 and len(sentence.split()) > 10:
284
  # Natural interruptions that humans actually use
285
  if random.random() < 0.5:
286
  # Add "you know" or "I mean" naturally
 
297
  openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
298
  sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
299
 
300
+ # Add subtle errors that humans make (10% chance - reduced)
301
+ if random.random() < 0.10:
302
  words = sentence.split()
303
  if len(words) > 5:
304
  # Common comma omissions
 
311
  words.insert(idx+1, words[idx])
312
  sentence = ' '.join(words)
313
 
314
+ # Natural sentence combinations (20% chance)
315
+ if i < len(sentences) - 1 and random.random() < 0.2:
316
  next_sent = sentences[i+1].strip()
317
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
318
  # Natural connectors based on content
 
544
  except:
545
  print("BART model not available")
546
  self.use_bart = False
547
+
548
+ # Initialize human variations handler
549
+ self.human_variations = HumanLikeVariations()
550
 
551
+ def add_natural_human_patterns(self, text):
552
  """Add natural human writing patterns that Originality AI associates with human text"""
553
  sentences = self.split_into_sentences_advanced(text)
554
  result_sentences = []
 
560
  # Natural contractions throughout
561
  sentence = self.apply_contractions(sentence)
562
 
563
+ # Add natural speech patterns (15% chance - balanced)
564
+ if random.random() < 0.15 and len(sentence.split()) > 10:
565
  # Natural interruptions that humans actually use
566
  if random.random() < 0.5:
567
  # Add "you know" or "I mean" naturally
 
578
  openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
579
  sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
580
 
581
+ # Add subtle errors that humans make (8% chance)
582
+ if random.random() < 0.08:
583
  words = sentence.split()
584
  if len(words) > 5:
585
  # Common comma omissions
 
592
  words.insert(idx+1, words[idx])
593
  sentence = ' '.join(words)
594
 
595
+ # Natural sentence combinations (20% chance)
596
+ if i < len(sentences) - 1 and random.random() < 0.2:
597
  next_sent = sentences[i+1].strip()
598
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
599
  # Natural connectors based on content
 
860
 
861
  return text.strip()
862
 
863
+ def paraphrase_with_dipper(self, text, lex_diversity=60, order_diversity=20):
 
 
864
  """Paraphrase text using Dipper model with sentence-level processing"""
865
  if not text or len(text.strip()) < 3:
866
  return text
 
878
  continue
879
 
880
  try:
881
+ # BALANCED diversity for Originality AI (100% human with better quality)
882
  if len(sentence.split()) < 10:
883
+ lex_diversity = 70 # High but not extreme
884
+ order_diversity = 25
 
885
  else:
886
+ lex_diversity = 75 # Balanced diversity
887
+ order_diversity = 30 # Moderate order diversity
888
 
889
+ lex_code = int(100 - lex_diversity)
890
+ order_code = int(100 - order_diversity)
891
 
892
  # Format input for Dipper
893
  if self.is_dipper:
 
913
 
914
  # Generate with appropriate variation
915
  original_length = len(sentence.split())
916
+ max_new_length = int(original_length * 1.4)
917
+
918
+ # High variation parameters
919
+ temp = 0.85 # Slightly reduced from 0.9
920
+ top_p_val = 0.92 # Slightly reduced from 0.95
921
 
922
  with torch.no_grad():
923
  outputs = self.model.generate(
 
925
  max_length=max_new_length + 20,
926
  min_length=max(5, int(original_length * 0.7)),
927
  do_sample=True,
928
+ top_p=top_p_val,
929
+ temperature=temp,
930
+ no_repeat_ngram_size=4, # Allow more repetition for naturalness
931
  num_beams=1, # Greedy for more randomness
932
  early_stopping=True
933
  )
 
964
  # Join sentences back
965
  result = ' '.join(paraphrased_sentences)
966
 
967
+ # Apply natural human patterns
968
+ result = self.add_natural_human_patterns(result)
969
+
970
  return result
971
 
972
  def fix_incomplete_sentence_smart(self, generated, original):
 
1055
  # Clean up sentences
1056
  return [s for s in sentences if s and len(s.strip()) > 0]
1057
 
1058
+ def paraphrase_with_bart(self, text):
1059
  """Additional paraphrasing with BART for more variation"""
1060
  if not self.use_bart or not text or len(text.strip()) < 3:
1061
  return text
 
1091
  **inputs,
1092
  max_length=int(original_length * 1.4) + 10,
1093
  min_length=max(5, int(original_length * 0.6)),
1094
+ num_beams=2,
1095
+ temperature=1.1, # Higher temperature
1096
  do_sample=True,
1097
+ top_p=0.9,
1098
  early_stopping=True
1099
  )
1100
 
 
1116
  print(f"Error in BART paraphrasing: {str(e)}")
1117
  return text
1118
 
1119
+ def apply_sentence_variation(self, text):
 
1120
  """Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
1121
  sentences = self.split_into_sentences_advanced(text)
1122
  varied_sentences = []
 
1132
  current_length = len(words)
1133
 
1134
  # Natural sentence length variation
1135
+ if last_sentence_length > 20 and current_length > 20:
1136
  # Break up if two long sentences in a row
1137
  if ',' in sentence:
1138
  parts = sentence.split(',', 1)
 
1147
 
1148
  # Natural combinations for flow
1149
  if (i < len(sentences) - 1 and
1150
+ current_length < 10 and
1151
+ len(sentences[i+1].split()) < 10):
1152
 
1153
  next_sent = sentences[i+1].strip()
1154
  # Only combine if it makes semantic sense
 
1364
 
1365
  return html_text
1366
 
1367
+ def add_natural_flow_variations(self, text):
 
1368
  """Add more natural flow and rhythm variations for Originality AI"""
1369
  sentences = self.split_into_sentences_advanced(text)
1370
  enhanced_sentences = []
 
1373
  if not sentence.strip():
1374
  continue
1375
 
1376
+ # Add stream-of-consciousness elements (8% chance - reduced)
1377
+ if random.random() < 0.08 and len(sentence.split()) > 10:
1378
  stream_elements = [
1379
  " - wait, let me back up - ",
1380
  " - actually, scratch that - ",
 
1388
  words.insert(pos, random.choice(stream_elements))
1389
  sentence = ' '.join(words)
1390
 
1391
+ # Add human-like self-corrections (7% chance - reduced)
1392
+ if random.random() < 0.07:
1393
  corrections = [
1394
  " - or rather, ",
1395
  " - well, actually, ",
 
1407
  words.insert(pos, correction)
1408
  sentence = ' '.join(words)
1409
 
1410
+ # Add thinking-out-loud patterns (10% chance - reduced)
1411
+ if random.random() < 0.10 and i > 0:
1412
  thinking_patterns = [
1413
  "Come to think of it, ",
1414
  "Actually, you know what? ",
 
1426
 
1427
  return ' '.join(enhanced_sentences)
1428
 
1429
+ def process_html(self, html_content, progress_callback=None):
1430
+ """Main processing function with progress callback"""
1431
  if not html_content.strip():
1432
  return "Please provide HTML content."
1433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1434
  # Store all script and style content to preserve it
1435
  script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
1436
  style_placeholder = "###STYLE_PLACEHOLDER_{}###"
 
1476
  if len(original_text.split()) < 3:
1477
  continue
1478
 
1479
+ # First pass with Dipper
1480
  paraphrased_text = self.paraphrase_with_dipper(
1481
  original_text,
1482
+ lex_diversity=60,
1483
+ order_diversity=20
 
 
 
 
 
 
 
 
 
 
 
 
1484
  )
1485
 
1486
+ # Second pass with BART for longer texts (balanced probability)
1487
  if self.use_bart and len(paraphrased_text.split()) > 8:
1488
+ # 30% chance to use BART for more variation (balanced)
1489
+ if random.random() < 0.3:
1490
+ paraphrased_text = self.paraphrase_with_bart(paraphrased_text)
 
 
 
 
1491
 
1492
+ # Apply sentence variation
1493
+ paraphrased_text = self.apply_sentence_variation(paraphrased_text)
 
 
 
 
1494
 
1495
+ # Add natural flow variations
1496
+ paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
 
 
 
 
 
1497
 
1498
  # Fix punctuation and formatting
1499
  paraphrased_text = self.fix_punctuation(paraphrased_text)
 
1658
  # Initialize the humanizer
1659
  humanizer = EnhancedDipperHumanizer()
1660
 
1661
+ def humanize_html(html_input, progress=gr.Progress()):
1662
+ """Gradio interface function with progress updates"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1663
  if not html_input:
1664
  return "Please provide HTML content to humanize."
1665
 
 
1671
  if total > 0:
1672
  progress(current / total, desc=f"Processing: {current}/{total} elements")
1673
 
1674
+ # Pass progress callback to process_html
1675
  result = humanizer.process_html(
1676
  html_input,
1677
+ progress_callback=progress_callback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1678
  )
1679
 
1680
  processing_time = time.time() - start_time
 
1683
 
1684
  return result
1685
 
1686
+ # Create Gradio interface with queue
1687
+ iface = gr.Interface(
1688
+ fn=humanize_html,
1689
+ inputs=[
1690
+ gr.Textbox(
1691
+ lines=10,
1692
+ placeholder="Paste your HTML content here...",
1693
+ label="HTML Input"
1694
+ )
1695
+ ],
1696
+ outputs=gr.Textbox(
1697
+ lines=10,
1698
+ label="Humanized HTML Output"
1699
+ ),
1700
+ title="Enhanced Dipper AI Humanizer - Optimized for Originality AI",
1701
+ description="""
1702
+ Ultra-aggressive humanizer optimized to achieve 100% human scores on both Undetectable AI and Originality AI.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1703
 
1704
+ Key Features:
1705
+ - Maximum diversity settings (90% lexical, 40% order) for natural variation
1706
+ - Enhanced human patterns: personal opinions, self-corrections, thinking-out-loud
1707
+ - Natural typos, contractions, and conversational flow
1708
+ - Stream-of-consciousness elements and rhetorical questions
1709
+ - Originality AI-specific optimizations: varied sentence starters, emphatic repetitions
1710
+ - Skips content in <strong>, <b>, and heading tags (including inside tables)
1711
+ - Designed to pass the strictest AI detection systems
1712
 
1713
+ The tool creates genuinely human-like writing patterns that fool even the most sophisticated detectors!
 
 
 
 
 
1714
 
1715
+ ⚠️ Note: Processing may take 5-10 minutes for large HTML documents.
1716
+ """,
1717
+ examples=[
1718
+ ["""<article>
1719
  <h1>The Benefits of Regular Exercise</h1>
1720
  <div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
1721
  <p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
1722
  <p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
1723
+ </article>"""]
1724
+ ],
1725
+ theme="default"
1726
+ )
1727
 
1728
  if __name__ == "__main__":
1729
  # Enable queue for better handling of long-running processes