Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,7 +25,14 @@ except:
|
|
| 25 |
class HumanLikeVariations:
|
| 26 |
"""Add human-like variations and intentional imperfections"""
|
| 27 |
|
| 28 |
-
def __init__(self
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Common human writing patterns - EXPANDED for Originality AI
|
| 30 |
self.casual_transitions = [
|
| 31 |
"So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
|
|
@@ -158,8 +165,8 @@ class HumanLikeVariations:
|
|
| 158 |
# Always use contractions where natural
|
| 159 |
sent = self.apply_contractions(sent)
|
| 160 |
|
| 161 |
-
# Add VERY occasional natural errors (
|
| 162 |
-
if random.random() <
|
| 163 |
error_types = [
|
| 164 |
# Missing comma in compound sentence
|
| 165 |
lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
|
|
@@ -199,30 +206,30 @@ class HumanLikeVariations:
|
|
| 199 |
}
|
| 200 |
|
| 201 |
for full, contr in contractions.items():
|
| 202 |
-
if random.random() <
|
| 203 |
text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
|
| 204 |
|
| 205 |
return text
|
| 206 |
|
| 207 |
def add_minor_errors(self, text):
|
| 208 |
"""Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
|
| 209 |
-
# Occasionally miss Oxford comma (
|
| 210 |
-
if random.random() <
|
| 211 |
# Only in lists, not random commas
|
| 212 |
text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
|
| 213 |
|
| 214 |
-
# Sometimes use 'which' instead of 'that' (
|
| 215 |
-
if random.random() <
|
| 216 |
# Only for non-restrictive clauses
|
| 217 |
matches = re.finditer(r'\b(\w+) that (\w+)', text)
|
| 218 |
for match in list(matches)[:1]: # Only first occurrence
|
| 219 |
if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
|
| 220 |
text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
|
| 221 |
|
| 222 |
-
# NEW: Add very occasional typos (
|
| 223 |
sentences = text.split('. ')
|
| 224 |
for i, sent in enumerate(sentences):
|
| 225 |
-
if random.random() <
|
| 226 |
words = sent.split()
|
| 227 |
# Pick a random word to potentially typo
|
| 228 |
word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
|
|
@@ -267,7 +274,7 @@ class HumanLikeVariations:
|
|
| 267 |
|
| 268 |
return text
|
| 269 |
|
| 270 |
-
def add_natural_human_patterns(self, text):
|
| 271 |
"""Add natural human writing patterns that Originality AI associates with human text"""
|
| 272 |
sentences = self.split_into_sentences_advanced(text)
|
| 273 |
result_sentences = []
|
|
@@ -279,8 +286,8 @@ class HumanLikeVariations:
|
|
| 279 |
# Natural contractions throughout
|
| 280 |
sentence = self.apply_contractions(sentence)
|
| 281 |
|
| 282 |
-
# Add natural speech patterns (
|
| 283 |
-
if random.random() <
|
| 284 |
# Natural interruptions that humans actually use
|
| 285 |
if random.random() < 0.5:
|
| 286 |
# Add "you know" or "I mean" naturally
|
|
@@ -297,8 +304,8 @@ class HumanLikeVariations:
|
|
| 297 |
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 298 |
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 299 |
|
| 300 |
-
# Add subtle errors that humans make (
|
| 301 |
-
if random.random() <
|
| 302 |
words = sentence.split()
|
| 303 |
if len(words) > 5:
|
| 304 |
# Common comma omissions
|
|
@@ -311,8 +318,8 @@ class HumanLikeVariations:
|
|
| 311 |
words.insert(idx+1, words[idx])
|
| 312 |
sentence = ' '.join(words)
|
| 313 |
|
| 314 |
-
# Natural sentence combinations (
|
| 315 |
-
if i < len(sentences) - 1 and random.random() <
|
| 316 |
next_sent = sentences[i+1].strip()
|
| 317 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 318 |
# Natural connectors based on content
|
|
@@ -544,11 +551,8 @@ class EnhancedDipperHumanizer:
|
|
| 544 |
except:
|
| 545 |
print("BART model not available")
|
| 546 |
self.use_bart = False
|
| 547 |
-
|
| 548 |
-
# Initialize human variations handler
|
| 549 |
-
self.human_variations = HumanLikeVariations()
|
| 550 |
|
| 551 |
-
def add_natural_human_patterns(self, text):
|
| 552 |
"""Add natural human writing patterns that Originality AI associates with human text"""
|
| 553 |
sentences = self.split_into_sentences_advanced(text)
|
| 554 |
result_sentences = []
|
|
@@ -560,8 +564,8 @@ class EnhancedDipperHumanizer:
|
|
| 560 |
# Natural contractions throughout
|
| 561 |
sentence = self.apply_contractions(sentence)
|
| 562 |
|
| 563 |
-
# Add natural speech patterns (
|
| 564 |
-
if random.random() <
|
| 565 |
# Natural interruptions that humans actually use
|
| 566 |
if random.random() < 0.5:
|
| 567 |
# Add "you know" or "I mean" naturally
|
|
@@ -578,8 +582,8 @@ class EnhancedDipperHumanizer:
|
|
| 578 |
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 579 |
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 580 |
|
| 581 |
-
# Add subtle errors that humans make (
|
| 582 |
-
if random.random() <
|
| 583 |
words = sentence.split()
|
| 584 |
if len(words) > 5:
|
| 585 |
# Common comma omissions
|
|
@@ -592,8 +596,8 @@ class EnhancedDipperHumanizer:
|
|
| 592 |
words.insert(idx+1, words[idx])
|
| 593 |
sentence = ' '.join(words)
|
| 594 |
|
| 595 |
-
# Natural sentence combinations (
|
| 596 |
-
if i < len(sentences) - 1 and random.random() <
|
| 597 |
next_sent = sentences[i+1].strip()
|
| 598 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 599 |
# Natural connectors based on content
|
|
@@ -860,7 +864,9 @@ class EnhancedDipperHumanizer:
|
|
| 860 |
|
| 861 |
return text.strip()
|
| 862 |
|
| 863 |
-
def paraphrase_with_dipper(self, text, lex_diversity=
|
|
|
|
|
|
|
| 864 |
"""Paraphrase text using Dipper model with sentence-level processing"""
|
| 865 |
if not text or len(text.strip()) < 3:
|
| 866 |
return text
|
|
@@ -878,16 +884,17 @@ class EnhancedDipperHumanizer:
|
|
| 878 |
continue
|
| 879 |
|
| 880 |
try:
|
| 881 |
-
#
|
| 882 |
if len(sentence.split()) < 10:
|
| 883 |
-
|
| 884 |
-
|
|
|
|
| 885 |
else:
|
| 886 |
-
|
| 887 |
-
|
| 888 |
|
| 889 |
-
lex_code = int(100 -
|
| 890 |
-
order_code = int(100 -
|
| 891 |
|
| 892 |
# Format input for Dipper
|
| 893 |
if self.is_dipper:
|
|
@@ -913,11 +920,7 @@ class EnhancedDipperHumanizer:
|
|
| 913 |
|
| 914 |
# Generate with appropriate variation
|
| 915 |
original_length = len(sentence.split())
|
| 916 |
-
max_new_length = int(original_length *
|
| 917 |
-
|
| 918 |
-
# High variation parameters
|
| 919 |
-
temp = 0.8
|
| 920 |
-
top_p_val = 0.9
|
| 921 |
|
| 922 |
with torch.no_grad():
|
| 923 |
outputs = self.model.generate(
|
|
@@ -925,9 +928,9 @@ class EnhancedDipperHumanizer:
|
|
| 925 |
max_length=max_new_length + 20,
|
| 926 |
min_length=max(5, int(original_length * 0.7)),
|
| 927 |
do_sample=True,
|
| 928 |
-
top_p=
|
| 929 |
-
temperature=
|
| 930 |
-
no_repeat_ngram_size=
|
| 931 |
num_beams=1, # Greedy for more randomness
|
| 932 |
early_stopping=True
|
| 933 |
)
|
|
@@ -964,9 +967,6 @@ class EnhancedDipperHumanizer:
|
|
| 964 |
# Join sentences back
|
| 965 |
result = ' '.join(paraphrased_sentences)
|
| 966 |
|
| 967 |
-
# Apply natural human patterns
|
| 968 |
-
result = self.add_natural_human_patterns(result)
|
| 969 |
-
|
| 970 |
return result
|
| 971 |
|
| 972 |
def fix_incomplete_sentence_smart(self, generated, original):
|
|
@@ -1055,7 +1055,7 @@ class EnhancedDipperHumanizer:
|
|
| 1055 |
# Clean up sentences
|
| 1056 |
return [s for s in sentences if s and len(s.strip()) > 0]
|
| 1057 |
|
| 1058 |
-
def paraphrase_with_bart(self, text):
|
| 1059 |
"""Additional paraphrasing with BART for more variation"""
|
| 1060 |
if not self.use_bart or not text or len(text.strip()) < 3:
|
| 1061 |
return text
|
|
@@ -1091,10 +1091,10 @@ class EnhancedDipperHumanizer:
|
|
| 1091 |
**inputs,
|
| 1092 |
max_length=int(original_length * 1.4) + 10,
|
| 1093 |
min_length=max(5, int(original_length * 0.6)),
|
| 1094 |
-
num_beams=
|
| 1095 |
-
temperature=
|
| 1096 |
do_sample=True,
|
| 1097 |
-
top_p=
|
| 1098 |
early_stopping=True
|
| 1099 |
)
|
| 1100 |
|
|
@@ -1116,7 +1116,8 @@ class EnhancedDipperHumanizer:
|
|
| 1116 |
print(f"Error in BART paraphrasing: {str(e)}")
|
| 1117 |
return text
|
| 1118 |
|
| 1119 |
-
def apply_sentence_variation(self, text
|
|
|
|
| 1120 |
"""Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
|
| 1121 |
sentences = self.split_into_sentences_advanced(text)
|
| 1122 |
varied_sentences = []
|
|
@@ -1132,7 +1133,7 @@ class EnhancedDipperHumanizer:
|
|
| 1132 |
current_length = len(words)
|
| 1133 |
|
| 1134 |
# Natural sentence length variation
|
| 1135 |
-
if last_sentence_length >
|
| 1136 |
# Break up if two long sentences in a row
|
| 1137 |
if ',' in sentence:
|
| 1138 |
parts = sentence.split(',', 1)
|
|
@@ -1147,8 +1148,8 @@ class EnhancedDipperHumanizer:
|
|
| 1147 |
|
| 1148 |
# Natural combinations for flow
|
| 1149 |
if (i < len(sentences) - 1 and
|
| 1150 |
-
current_length <
|
| 1151 |
-
len(sentences[i+1].split()) <
|
| 1152 |
|
| 1153 |
next_sent = sentences[i+1].strip()
|
| 1154 |
# Only combine if it makes semantic sense
|
|
@@ -1364,7 +1365,8 @@ class EnhancedDipperHumanizer:
|
|
| 1364 |
|
| 1365 |
return html_text
|
| 1366 |
|
| 1367 |
-
def add_natural_flow_variations(self, text
|
|
|
|
| 1368 |
"""Add more natural flow and rhythm variations for Originality AI"""
|
| 1369 |
sentences = self.split_into_sentences_advanced(text)
|
| 1370 |
enhanced_sentences = []
|
|
@@ -1373,8 +1375,8 @@ class EnhancedDipperHumanizer:
|
|
| 1373 |
if not sentence.strip():
|
| 1374 |
continue
|
| 1375 |
|
| 1376 |
-
# Add stream-of-consciousness elements (
|
| 1377 |
-
if random.random() <
|
| 1378 |
stream_elements = [
|
| 1379 |
" - wait, let me back up - ",
|
| 1380 |
" - actually, scratch that - ",
|
|
@@ -1388,8 +1390,8 @@ class EnhancedDipperHumanizer:
|
|
| 1388 |
words.insert(pos, random.choice(stream_elements))
|
| 1389 |
sentence = ' '.join(words)
|
| 1390 |
|
| 1391 |
-
# Add human-like self-corrections (
|
| 1392 |
-
if random.random() <
|
| 1393 |
corrections = [
|
| 1394 |
" - or rather, ",
|
| 1395 |
" - well, actually, ",
|
|
@@ -1407,8 +1409,8 @@ class EnhancedDipperHumanizer:
|
|
| 1407 |
words.insert(pos, correction)
|
| 1408 |
sentence = ' '.join(words)
|
| 1409 |
|
| 1410 |
-
# Add thinking-out-loud patterns (
|
| 1411 |
-
if random.random() <
|
| 1412 |
thinking_patterns = [
|
| 1413 |
"Come to think of it, ",
|
| 1414 |
"Actually, you know what? ",
|
|
@@ -1426,11 +1428,45 @@ class EnhancedDipperHumanizer:
|
|
| 1426 |
|
| 1427 |
return ' '.join(enhanced_sentences)
|
| 1428 |
|
| 1429 |
-
def process_html(self, html_content, progress_callback=None):
|
| 1430 |
-
"""Main processing function with progress callback"""
|
| 1431 |
if not html_content.strip():
|
| 1432 |
return "Please provide HTML content."
|
| 1433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1434 |
# Store all script and style content to preserve it
|
| 1435 |
script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
|
| 1436 |
style_placeholder = "###STYLE_PLACEHOLDER_{}###"
|
|
@@ -1476,24 +1512,49 @@ class EnhancedDipperHumanizer:
|
|
| 1476 |
if len(original_text.split()) < 3:
|
| 1477 |
continue
|
| 1478 |
|
| 1479 |
-
# First pass with Dipper
|
| 1480 |
paraphrased_text = self.paraphrase_with_dipper(
|
| 1481 |
original_text,
|
| 1482 |
-
lex_diversity=
|
| 1483 |
-
order_diversity=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1484 |
)
|
| 1485 |
|
| 1486 |
-
# Second pass with BART for longer texts (
|
| 1487 |
if self.use_bart and len(paraphrased_text.split()) > 8:
|
| 1488 |
-
|
| 1489 |
-
|
| 1490 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1491 |
|
| 1492 |
-
# Apply sentence variation
|
| 1493 |
-
paraphrased_text = self.apply_sentence_variation(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1494 |
|
| 1495 |
-
# Add natural flow variations
|
| 1496 |
-
paraphrased_text = self.add_natural_flow_variations(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1497 |
|
| 1498 |
# Fix punctuation and formatting
|
| 1499 |
paraphrased_text = self.fix_punctuation(paraphrased_text)
|
|
@@ -1658,8 +1719,24 @@ class EnhancedDipperHumanizer:
|
|
| 1658 |
# Initialize the humanizer
|
| 1659 |
humanizer = EnhancedDipperHumanizer()
|
| 1660 |
|
| 1661 |
-
def humanize_html(html_input,
|
| 1662 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1663 |
if not html_input:
|
| 1664 |
return "Please provide HTML content to humanize."
|
| 1665 |
|
|
@@ -1671,10 +1748,33 @@ def humanize_html(html_input, progress=gr.Progress()):
|
|
| 1671 |
if total > 0:
|
| 1672 |
progress(current / total, desc=f"Processing: {current}/{total} elements")
|
| 1673 |
|
| 1674 |
-
# Pass
|
| 1675 |
result = humanizer.process_html(
|
| 1676 |
html_input,
|
| 1677 |
-
progress_callback=progress_callback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1678 |
)
|
| 1679 |
|
| 1680 |
processing_time = time.time() - start_time
|
|
@@ -1683,47 +1783,192 @@ def humanize_html(html_input, progress=gr.Progress()):
|
|
| 1683 |
|
| 1684 |
return result
|
| 1685 |
|
| 1686 |
-
# Create Gradio interface with
|
| 1687 |
-
|
| 1688 |
-
|
| 1689 |
-
|
| 1690 |
-
|
| 1691 |
-
|
| 1692 |
-
|
| 1693 |
-
|
| 1694 |
-
)
|
| 1695 |
-
],
|
| 1696 |
-
outputs=gr.Textbox(
|
| 1697 |
-
lines=10,
|
| 1698 |
-
label="Humanized HTML Output"
|
| 1699 |
-
),
|
| 1700 |
-
title="Enhanced Dipper AI Humanizer - Optimized for Originality AI",
|
| 1701 |
-
description="""
|
| 1702 |
-
Ultra-aggressive humanizer optimized to achieve 100% human scores on both Undetectable AI and Originality AI.
|
| 1703 |
|
| 1704 |
-
|
| 1705 |
-
|
| 1706 |
-
|
| 1707 |
-
|
| 1708 |
-
|
| 1709 |
-
|
| 1710 |
-
|
| 1711 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1712 |
|
| 1713 |
-
|
|
|
|
|
|
|
|
|
|
| 1714 |
|
| 1715 |
-
|
| 1716 |
-
|
| 1717 |
-
|
| 1718 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1719 |
<h1>The Benefits of Regular Exercise</h1>
|
| 1720 |
<div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
|
| 1721 |
<p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
|
| 1722 |
<p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
|
| 1723 |
-
</article>"""]
|
| 1724 |
-
|
| 1725 |
-
|
| 1726 |
-
)
|
| 1727 |
|
| 1728 |
if __name__ == "__main__":
|
| 1729 |
# Enable queue for better handling of long-running processes
|
|
|
|
| 25 |
class HumanLikeVariations:
|
| 26 |
"""Add human-like variations and intentional imperfections"""
|
| 27 |
|
| 28 |
+
def __init__(self, contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
|
| 29 |
+
typo_prob=0.02, natural_error_prob=0.05):
|
| 30 |
+
self.contraction_prob = contraction_prob
|
| 31 |
+
self.oxford_comma_prob = oxford_comma_prob
|
| 32 |
+
self.which_that_prob = which_that_prob
|
| 33 |
+
self.typo_prob = typo_prob
|
| 34 |
+
self.natural_error_prob = natural_error_prob
|
| 35 |
+
|
| 36 |
# Common human writing patterns - EXPANDED for Originality AI
|
| 37 |
self.casual_transitions = [
|
| 38 |
"So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
|
|
|
|
| 165 |
# Always use contractions where natural
|
| 166 |
sent = self.apply_contractions(sent)
|
| 167 |
|
| 168 |
+
# Add VERY occasional natural errors (based on parameter)
|
| 169 |
+
if random.random() < self.natural_error_prob and len(sent.split()) > 15:
|
| 170 |
error_types = [
|
| 171 |
# Missing comma in compound sentence
|
| 172 |
lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
|
|
|
|
| 206 |
}
|
| 207 |
|
| 208 |
for full, contr in contractions.items():
|
| 209 |
+
if random.random() < self.contraction_prob: # Use configurable probability
|
| 210 |
text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
|
| 211 |
|
| 212 |
return text
|
| 213 |
|
| 214 |
def add_minor_errors(self, text):
|
| 215 |
"""Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
|
| 216 |
+
# Occasionally miss Oxford comma (based on parameter)
|
| 217 |
+
if random.random() < self.oxford_comma_prob:
|
| 218 |
# Only in lists, not random commas
|
| 219 |
text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
|
| 220 |
|
| 221 |
+
# Sometimes use 'which' instead of 'that' (based on parameter)
|
| 222 |
+
if random.random() < self.which_that_prob:
|
| 223 |
# Only for non-restrictive clauses
|
| 224 |
matches = re.finditer(r'\b(\w+) that (\w+)', text)
|
| 225 |
for match in list(matches)[:1]: # Only first occurrence
|
| 226 |
if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
|
| 227 |
text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
|
| 228 |
|
| 229 |
+
# NEW: Add very occasional typos (based on parameter) - REDUCED AND CONTROLLED
|
| 230 |
sentences = text.split('. ')
|
| 231 |
for i, sent in enumerate(sentences):
|
| 232 |
+
if random.random() < self.typo_prob and len(sent.split()) > 15: # Only in longer sentences
|
| 233 |
words = sent.split()
|
| 234 |
# Pick a random word to potentially typo
|
| 235 |
word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
|
|
|
|
| 274 |
|
| 275 |
return text
|
| 276 |
|
| 277 |
+
def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
|
| 278 |
"""Add natural human writing patterns that Originality AI associates with human text"""
|
| 279 |
sentences = self.split_into_sentences_advanced(text)
|
| 280 |
result_sentences = []
|
|
|
|
| 286 |
# Natural contractions throughout
|
| 287 |
sentence = self.apply_contractions(sentence)
|
| 288 |
|
| 289 |
+
# Add natural speech patterns (based on parameter)
|
| 290 |
+
if random.random() < speech_prob and len(sentence.split()) > 10:
|
| 291 |
# Natural interruptions that humans actually use
|
| 292 |
if random.random() < 0.5:
|
| 293 |
# Add "you know" or "I mean" naturally
|
|
|
|
| 304 |
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 305 |
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 306 |
|
| 307 |
+
# Add subtle errors that humans make (based on parameter)
|
| 308 |
+
if random.random() < error_prob:
|
| 309 |
words = sentence.split()
|
| 310 |
if len(words) > 5:
|
| 311 |
# Common comma omissions
|
|
|
|
| 318 |
words.insert(idx+1, words[idx])
|
| 319 |
sentence = ' '.join(words)
|
| 320 |
|
| 321 |
+
# Natural sentence combinations (based on parameter)
|
| 322 |
+
if i < len(sentences) - 1 and random.random() < combine_prob:
|
| 323 |
next_sent = sentences[i+1].strip()
|
| 324 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 325 |
# Natural connectors based on content
|
|
|
|
| 551 |
except:
|
| 552 |
print("BART model not available")
|
| 553 |
self.use_bart = False
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
+
def add_natural_human_patterns(self, text, speech_prob=0.15, error_prob=0.10, combine_prob=0.2):
|
| 556 |
"""Add natural human writing patterns that Originality AI associates with human text"""
|
| 557 |
sentences = self.split_into_sentences_advanced(text)
|
| 558 |
result_sentences = []
|
|
|
|
| 564 |
# Natural contractions throughout
|
| 565 |
sentence = self.apply_contractions(sentence)
|
| 566 |
|
| 567 |
+
# Add natural speech patterns (based on parameter)
|
| 568 |
+
if random.random() < speech_prob and len(sentence.split()) > 10:
|
| 569 |
# Natural interruptions that humans actually use
|
| 570 |
if random.random() < 0.5:
|
| 571 |
# Add "you know" or "I mean" naturally
|
|
|
|
| 582 |
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 583 |
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 584 |
|
| 585 |
+
# Add subtle errors that humans make (based on parameter)
|
| 586 |
+
if random.random() < error_prob:
|
| 587 |
words = sentence.split()
|
| 588 |
if len(words) > 5:
|
| 589 |
# Common comma omissions
|
|
|
|
| 596 |
words.insert(idx+1, words[idx])
|
| 597 |
sentence = ' '.join(words)
|
| 598 |
|
| 599 |
+
# Natural sentence combinations (based on parameter)
|
| 600 |
+
if i < len(sentences) - 1 and random.random() < combine_prob:
|
| 601 |
next_sent = sentences[i+1].strip()
|
| 602 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 603 |
# Natural connectors based on content
|
|
|
|
| 864 |
|
| 865 |
return text.strip()
|
| 866 |
|
| 867 |
+
def paraphrase_with_dipper(self, text, lex_diversity=75, order_diversity=30,
|
| 868 |
+
temperature=0.85, top_p=0.92, length_multiplier=1.4,
|
| 869 |
+
no_repeat_ngram=4):
|
| 870 |
"""Paraphrase text using Dipper model with sentence-level processing"""
|
| 871 |
if not text or len(text.strip()) < 3:
|
| 872 |
return text
|
|
|
|
| 884 |
continue
|
| 885 |
|
| 886 |
try:
|
| 887 |
+
# Apply diversity settings based on sentence length
|
| 888 |
if len(sentence.split()) < 10:
|
| 889 |
+
# Use slightly lower diversity for short sentences
|
| 890 |
+
actual_lex = max(lex_diversity - 5, 50)
|
| 891 |
+
actual_order = max(order_diversity - 5, 15)
|
| 892 |
else:
|
| 893 |
+
actual_lex = lex_diversity
|
| 894 |
+
actual_order = order_diversity
|
| 895 |
|
| 896 |
+
lex_code = int(100 - actual_lex)
|
| 897 |
+
order_code = int(100 - actual_order)
|
| 898 |
|
| 899 |
# Format input for Dipper
|
| 900 |
if self.is_dipper:
|
|
|
|
| 920 |
|
| 921 |
# Generate with appropriate variation
|
| 922 |
original_length = len(sentence.split())
|
| 923 |
+
max_new_length = int(original_length * length_multiplier)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 924 |
|
| 925 |
with torch.no_grad():
|
| 926 |
outputs = self.model.generate(
|
|
|
|
| 928 |
max_length=max_new_length + 20,
|
| 929 |
min_length=max(5, int(original_length * 0.7)),
|
| 930 |
do_sample=True,
|
| 931 |
+
top_p=top_p,
|
| 932 |
+
temperature=temperature,
|
| 933 |
+
no_repeat_ngram_size=no_repeat_ngram,
|
| 934 |
num_beams=1, # Greedy for more randomness
|
| 935 |
early_stopping=True
|
| 936 |
)
|
|
|
|
| 967 |
# Join sentences back
|
| 968 |
result = ' '.join(paraphrased_sentences)
|
| 969 |
|
|
|
|
|
|
|
|
|
|
| 970 |
return result
|
| 971 |
|
| 972 |
def fix_incomplete_sentence_smart(self, generated, original):
|
|
|
|
| 1055 |
# Clean up sentences
|
| 1056 |
return [s for s in sentences if s and len(s.strip()) > 0]
|
| 1057 |
|
| 1058 |
+
def paraphrase_with_bart(self, text, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2):
|
| 1059 |
"""Additional paraphrasing with BART for more variation"""
|
| 1060 |
if not self.use_bart or not text or len(text.strip()) < 3:
|
| 1061 |
return text
|
|
|
|
| 1091 |
**inputs,
|
| 1092 |
max_length=int(original_length * 1.4) + 10,
|
| 1093 |
min_length=max(5, int(original_length * 0.6)),
|
| 1094 |
+
num_beams=bart_beams,
|
| 1095 |
+
temperature=bart_temperature,
|
| 1096 |
do_sample=True,
|
| 1097 |
+
top_p=bart_top_p,
|
| 1098 |
early_stopping=True
|
| 1099 |
)
|
| 1100 |
|
|
|
|
| 1116 |
print(f"Error in BART paraphrasing: {str(e)}")
|
| 1117 |
return text
|
| 1118 |
|
| 1119 |
+
def apply_sentence_variation(self, text, long_sentence_threshold=20,
|
| 1120 |
+
short_sentence_threshold=10):
|
| 1121 |
"""Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
|
| 1122 |
sentences = self.split_into_sentences_advanced(text)
|
| 1123 |
varied_sentences = []
|
|
|
|
| 1133 |
current_length = len(words)
|
| 1134 |
|
| 1135 |
# Natural sentence length variation
|
| 1136 |
+
if last_sentence_length > long_sentence_threshold and current_length > long_sentence_threshold:
|
| 1137 |
# Break up if two long sentences in a row
|
| 1138 |
if ',' in sentence:
|
| 1139 |
parts = sentence.split(',', 1)
|
|
|
|
| 1148 |
|
| 1149 |
# Natural combinations for flow
|
| 1150 |
if (i < len(sentences) - 1 and
|
| 1151 |
+
current_length < short_sentence_threshold and
|
| 1152 |
+
len(sentences[i+1].split()) < short_sentence_threshold):
|
| 1153 |
|
| 1154 |
next_sent = sentences[i+1].strip()
|
| 1155 |
# Only combine if it makes semantic sense
|
|
|
|
| 1365 |
|
| 1366 |
return html_text
|
| 1367 |
|
| 1368 |
+
def add_natural_flow_variations(self, text, stream_prob=0.08, correction_prob=0.07,
|
| 1369 |
+
thinking_prob=0.10):
|
| 1370 |
"""Add more natural flow and rhythm variations for Originality AI"""
|
| 1371 |
sentences = self.split_into_sentences_advanced(text)
|
| 1372 |
enhanced_sentences = []
|
|
|
|
| 1375 |
if not sentence.strip():
|
| 1376 |
continue
|
| 1377 |
|
| 1378 |
+
# Add stream-of-consciousness elements (based on parameter)
|
| 1379 |
+
if random.random() < stream_prob and len(sentence.split()) > 10:
|
| 1380 |
stream_elements = [
|
| 1381 |
" - wait, let me back up - ",
|
| 1382 |
" - actually, scratch that - ",
|
|
|
|
| 1390 |
words.insert(pos, random.choice(stream_elements))
|
| 1391 |
sentence = ' '.join(words)
|
| 1392 |
|
| 1393 |
+
# Add human-like self-corrections (based on parameter)
|
| 1394 |
+
if random.random() < correction_prob:
|
| 1395 |
corrections = [
|
| 1396 |
" - or rather, ",
|
| 1397 |
" - well, actually, ",
|
|
|
|
| 1409 |
words.insert(pos, correction)
|
| 1410 |
sentence = ' '.join(words)
|
| 1411 |
|
| 1412 |
+
# Add thinking-out-loud patterns (based on parameter)
|
| 1413 |
+
if random.random() < thinking_prob and i > 0:
|
| 1414 |
thinking_patterns = [
|
| 1415 |
"Come to think of it, ",
|
| 1416 |
"Actually, you know what? ",
|
|
|
|
| 1428 |
|
| 1429 |
return ' '.join(enhanced_sentences)
|
| 1430 |
|
| 1431 |
+
def process_html(self, html_content, progress_callback=None, **kwargs):
|
| 1432 |
+
"""Main processing function with progress callback and configurable parameters"""
|
| 1433 |
if not html_content.strip():
|
| 1434 |
return "Please provide HTML content."
|
| 1435 |
|
| 1436 |
+
# Extract all parameters with defaults
|
| 1437 |
+
lex_diversity = kwargs.get('lex_diversity', 75)
|
| 1438 |
+
order_diversity = kwargs.get('order_diversity', 30)
|
| 1439 |
+
temperature = kwargs.get('temperature', 0.85)
|
| 1440 |
+
top_p = kwargs.get('top_p', 0.92)
|
| 1441 |
+
length_multiplier = kwargs.get('length_multiplier', 1.4)
|
| 1442 |
+
no_repeat_ngram = kwargs.get('no_repeat_ngram', 4)
|
| 1443 |
+
bart_usage_prob = kwargs.get('bart_usage_prob', 0.3)
|
| 1444 |
+
bart_temperature = kwargs.get('bart_temperature', 1.1)
|
| 1445 |
+
bart_top_p = kwargs.get('bart_top_p', 0.9)
|
| 1446 |
+
bart_beams = kwargs.get('bart_beams', 2)
|
| 1447 |
+
contraction_prob = kwargs.get('contraction_prob', 0.8)
|
| 1448 |
+
oxford_comma_prob = kwargs.get('oxford_comma_prob', 0.15)
|
| 1449 |
+
which_that_prob = kwargs.get('which_that_prob', 0.08)
|
| 1450 |
+
typo_prob = kwargs.get('typo_prob', 0.02)
|
| 1451 |
+
natural_error_prob = kwargs.get('natural_error_prob', 0.05)
|
| 1452 |
+
speech_pattern_prob = kwargs.get('speech_pattern_prob', 0.15)
|
| 1453 |
+
subtle_error_prob = kwargs.get('subtle_error_prob', 0.10)
|
| 1454 |
+
sentence_combine_prob = kwargs.get('sentence_combine_prob', 0.2)
|
| 1455 |
+
stream_conscious_prob = kwargs.get('stream_conscious_prob', 0.08)
|
| 1456 |
+
self_correction_prob = kwargs.get('self_correction_prob', 0.07)
|
| 1457 |
+
thinking_loud_prob = kwargs.get('thinking_loud_prob', 0.10)
|
| 1458 |
+
long_sentence_threshold = kwargs.get('long_sentence_threshold', 20)
|
| 1459 |
+
short_sentence_threshold = kwargs.get('short_sentence_threshold', 10)
|
| 1460 |
+
|
| 1461 |
+
# Initialize human variations with parameters
|
| 1462 |
+
self.human_variations = HumanLikeVariations(
|
| 1463 |
+
contraction_prob=contraction_prob,
|
| 1464 |
+
oxford_comma_prob=oxford_comma_prob,
|
| 1465 |
+
which_that_prob=which_that_prob,
|
| 1466 |
+
typo_prob=typo_prob,
|
| 1467 |
+
natural_error_prob=natural_error_prob
|
| 1468 |
+
)
|
| 1469 |
+
|
| 1470 |
# Store all script and style content to preserve it
|
| 1471 |
script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
|
| 1472 |
style_placeholder = "###STYLE_PLACEHOLDER_{}###"
|
|
|
|
| 1512 |
if len(original_text.split()) < 3:
|
| 1513 |
continue
|
| 1514 |
|
| 1515 |
+
# First pass with Dipper using configured parameters
|
| 1516 |
paraphrased_text = self.paraphrase_with_dipper(
|
| 1517 |
original_text,
|
| 1518 |
+
lex_diversity=lex_diversity,
|
| 1519 |
+
order_diversity=order_diversity,
|
| 1520 |
+
temperature=temperature,
|
| 1521 |
+
top_p=top_p,
|
| 1522 |
+
length_multiplier=length_multiplier,
|
| 1523 |
+
no_repeat_ngram=no_repeat_ngram
|
| 1524 |
+
)
|
| 1525 |
+
|
| 1526 |
+
# Add natural human patterns with configured probabilities
|
| 1527 |
+
paraphrased_text = self.add_natural_human_patterns(
|
| 1528 |
+
paraphrased_text,
|
| 1529 |
+
speech_prob=speech_pattern_prob,
|
| 1530 |
+
error_prob=subtle_error_prob,
|
| 1531 |
+
combine_prob=sentence_combine_prob
|
| 1532 |
)
|
| 1533 |
|
| 1534 |
+
# Second pass with BART for longer texts (based on configured probability)
|
| 1535 |
if self.use_bart and len(paraphrased_text.split()) > 8:
|
| 1536 |
+
if random.random() < bart_usage_prob:
|
| 1537 |
+
paraphrased_text = self.paraphrase_with_bart(
|
| 1538 |
+
paraphrased_text,
|
| 1539 |
+
bart_temperature=bart_temperature,
|
| 1540 |
+
bart_top_p=bart_top_p,
|
| 1541 |
+
bart_beams=bart_beams
|
| 1542 |
+
)
|
| 1543 |
|
| 1544 |
+
# Apply sentence variation with configured thresholds
|
| 1545 |
+
paraphrased_text = self.apply_sentence_variation(
|
| 1546 |
+
paraphrased_text,
|
| 1547 |
+
long_sentence_threshold=long_sentence_threshold,
|
| 1548 |
+
short_sentence_threshold=short_sentence_threshold
|
| 1549 |
+
)
|
| 1550 |
|
| 1551 |
+
# Add natural flow variations with configured probabilities
|
| 1552 |
+
paraphrased_text = self.add_natural_flow_variations(
|
| 1553 |
+
paraphrased_text,
|
| 1554 |
+
stream_prob=stream_conscious_prob,
|
| 1555 |
+
correction_prob=self_correction_prob,
|
| 1556 |
+
thinking_prob=thinking_loud_prob
|
| 1557 |
+
)
|
| 1558 |
|
| 1559 |
# Fix punctuation and formatting
|
| 1560 |
paraphrased_text = self.fix_punctuation(paraphrased_text)
|
|
|
|
| 1719 |
# Initialize the humanizer
|
| 1720 |
humanizer = EnhancedDipperHumanizer()
|
| 1721 |
|
| 1722 |
+
def humanize_html(html_input,
|
| 1723 |
+
# Diversity Settings
|
| 1724 |
+
lex_diversity=75, order_diversity=30,
|
| 1725 |
+
# Generation Parameters
|
| 1726 |
+
temperature=0.85, top_p=0.92, length_multiplier=1.4, no_repeat_ngram=4,
|
| 1727 |
+
# BART Parameters
|
| 1728 |
+
bart_usage_prob=0.3, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2,
|
| 1729 |
+
# Human Variation Parameters
|
| 1730 |
+
contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
|
| 1731 |
+
typo_prob=0.02, natural_error_prob=0.05,
|
| 1732 |
+
# Human Pattern Frequencies
|
| 1733 |
+
speech_pattern_prob=0.15, subtle_error_prob=0.10, sentence_combine_prob=0.2,
|
| 1734 |
+
# Flow Variation Parameters
|
| 1735 |
+
stream_conscious_prob=0.08, self_correction_prob=0.07, thinking_loud_prob=0.10,
|
| 1736 |
+
# Sentence Variation Parameters
|
| 1737 |
+
long_sentence_threshold=20, short_sentence_threshold=10,
|
| 1738 |
+
progress=gr.Progress()):
|
| 1739 |
+
"""Gradio interface function with progress updates and all parameters"""
|
| 1740 |
if not html_input:
|
| 1741 |
return "Please provide HTML content to humanize."
|
| 1742 |
|
|
|
|
| 1748 |
if total > 0:
|
| 1749 |
progress(current / total, desc=f"Processing: {current}/{total} elements")
|
| 1750 |
|
| 1751 |
+
# Pass all parameters to process_html
|
| 1752 |
result = humanizer.process_html(
|
| 1753 |
html_input,
|
| 1754 |
+
progress_callback=progress_callback,
|
| 1755 |
+
lex_diversity=lex_diversity,
|
| 1756 |
+
order_diversity=order_diversity,
|
| 1757 |
+
temperature=temperature,
|
| 1758 |
+
top_p=top_p,
|
| 1759 |
+
length_multiplier=length_multiplier,
|
| 1760 |
+
no_repeat_ngram=no_repeat_ngram,
|
| 1761 |
+
bart_usage_prob=bart_usage_prob,
|
| 1762 |
+
bart_temperature=bart_temperature,
|
| 1763 |
+
bart_top_p=bart_top_p,
|
| 1764 |
+
bart_beams=bart_beams,
|
| 1765 |
+
contraction_prob=contraction_prob,
|
| 1766 |
+
oxford_comma_prob=oxford_comma_prob,
|
| 1767 |
+
which_that_prob=which_that_prob,
|
| 1768 |
+
typo_prob=typo_prob,
|
| 1769 |
+
natural_error_prob=natural_error_prob,
|
| 1770 |
+
speech_pattern_prob=speech_pattern_prob,
|
| 1771 |
+
subtle_error_prob=subtle_error_prob,
|
| 1772 |
+
sentence_combine_prob=sentence_combine_prob,
|
| 1773 |
+
stream_conscious_prob=stream_conscious_prob,
|
| 1774 |
+
self_correction_prob=self_correction_prob,
|
| 1775 |
+
thinking_loud_prob=thinking_loud_prob,
|
| 1776 |
+
long_sentence_threshold=long_sentence_threshold,
|
| 1777 |
+
short_sentence_threshold=short_sentence_threshold
|
| 1778 |
)
|
| 1779 |
|
| 1780 |
processing_time = time.time() - start_time
|
|
|
|
| 1783 |
|
| 1784 |
return result
|
| 1785 |
|
| 1786 |
+
# Create Gradio interface with all parameter inputs
|
| 1787 |
+
with gr.Blocks(title="Enhanced Dipper AI Humanizer - Fully Configurable") as iface:
|
| 1788 |
+
gr.Markdown("""
|
| 1789 |
+
# Enhanced Dipper AI Humanizer - Optimized for Originality AI
|
| 1790 |
+
|
| 1791 |
+
Ultra-configurable humanizer with fine-grained control over all parameters.
|
| 1792 |
+
Adjust settings to find the perfect balance between human score and content quality.
|
| 1793 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1794 |
|
| 1795 |
+
with gr.Row():
|
| 1796 |
+
with gr.Column(scale=1):
|
| 1797 |
+
html_input = gr.Textbox(
|
| 1798 |
+
lines=10,
|
| 1799 |
+
placeholder="Paste your HTML content here...",
|
| 1800 |
+
label="HTML Input"
|
| 1801 |
+
)
|
| 1802 |
+
|
| 1803 |
+
process_btn = gr.Button("Process HTML", variant="primary")
|
| 1804 |
+
|
| 1805 |
+
with gr.Column(scale=1):
|
| 1806 |
+
html_output = gr.Textbox(
|
| 1807 |
+
lines=10,
|
| 1808 |
+
label="Humanized HTML Output"
|
| 1809 |
+
)
|
| 1810 |
+
|
| 1811 |
+
with gr.Tabs():
|
| 1812 |
+
with gr.Tab("Diversity Settings"):
|
| 1813 |
+
gr.Markdown("**Controls how much the text is varied from the original**")
|
| 1814 |
+
lex_diversity = gr.Slider(0, 100, value=75, step=5,
|
| 1815 |
+
label="Lexical Diversity",
|
| 1816 |
+
info="Higher = more word variation (75 balanced, 90+ for max human score)")
|
| 1817 |
+
order_diversity = gr.Slider(0, 100, value=30, step=5,
|
| 1818 |
+
label="Order Diversity",
|
| 1819 |
+
info="Higher = more word reordering (30 balanced, 40+ for max human score)")
|
| 1820 |
+
|
| 1821 |
+
with gr.Tab("Generation Parameters"):
|
| 1822 |
+
gr.Markdown("**Fine-tune the AI model's text generation behavior**")
|
| 1823 |
+
temperature = gr.Slider(0.1, 2.0, value=0.85, step=0.05,
|
| 1824 |
+
label="Temperature",
|
| 1825 |
+
info="Higher = more randomness (0.85 balanced, 0.9+ for max human)")
|
| 1826 |
+
top_p = gr.Slider(0.1, 1.0, value=0.92, step=0.02,
|
| 1827 |
+
label="Top-p (nucleus sampling)",
|
| 1828 |
+
info="Higher = wider token selection (0.92 balanced, 0.95 for max human)")
|
| 1829 |
+
length_multiplier = gr.Slider(1.1, 2.0, value=1.4, step=0.1,
|
| 1830 |
+
label="Length Multiplier",
|
| 1831 |
+
info="How much longer/shorter output can be vs input")
|
| 1832 |
+
no_repeat_ngram = gr.Slider(2, 6, value=4, step=1,
|
| 1833 |
+
label="No Repeat N-gram Size",
|
| 1834 |
+
info="Prevents repetition of N-word phrases (4 is balanced)")
|
| 1835 |
+
|
| 1836 |
+
with gr.Tab("BART Parameters"):
|
| 1837 |
+
gr.Markdown("**Settings for secondary BART paraphrasing model**")
|
| 1838 |
+
bart_usage_prob = gr.Slider(0.0, 1.0, value=0.3, step=0.05,
|
| 1839 |
+
label="BART Usage Probability",
|
| 1840 |
+
info="Chance to use BART for additional variation")
|
| 1841 |
+
bart_temperature = gr.Slider(0.7, 1.5, value=1.1, step=0.05,
|
| 1842 |
+
label="BART Temperature",
|
| 1843 |
+
info="Temperature for BART model")
|
| 1844 |
+
bart_top_p = gr.Slider(0.8, 1.0, value=0.9, step=0.02,
|
| 1845 |
+
label="BART Top-p",
|
| 1846 |
+
info="Top-p for BART model")
|
| 1847 |
+
bart_beams = gr.Slider(1, 4, value=2, step=1,
|
| 1848 |
+
label="BART Beam Size",
|
| 1849 |
+
info="Number of beams for BART generation")
|
| 1850 |
+
|
| 1851 |
+
with gr.Tab("Human Variations"):
|
| 1852 |
+
gr.Markdown("**Control natural human-like writing patterns**")
|
| 1853 |
+
contraction_prob = gr.Slider(0.0, 1.0, value=0.8, step=0.05,
|
| 1854 |
+
label="Contraction Probability",
|
| 1855 |
+
info="Chance to use contractions (it's vs it is)")
|
| 1856 |
+
oxford_comma_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
|
| 1857 |
+
label="Oxford Comma Skip Probability",
|
| 1858 |
+
info="Chance to skip Oxford comma (human-like error)")
|
| 1859 |
+
which_that_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
|
| 1860 |
+
label="Which/That Substitution",
|
| 1861 |
+
info="Chance to use 'which' instead of 'that'")
|
| 1862 |
+
typo_prob = gr.Slider(0.0, 0.1, value=0.02, step=0.01,
|
| 1863 |
+
label="Typo Probability",
|
| 1864 |
+
info="Chance of natural typos per sentence")
|
| 1865 |
+
natural_error_prob = gr.Slider(0.0, 0.2, value=0.05, step=0.01,
|
| 1866 |
+
label="Natural Error Probability",
|
| 1867 |
+
info="Chance of human-like errors (missing commas, etc)")
|
| 1868 |
+
|
| 1869 |
+
with gr.Tab("Human Pattern Frequencies"):
|
| 1870 |
+
gr.Markdown("**Frequency of conversational elements**")
|
| 1871 |
+
speech_pattern_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
|
| 1872 |
+
label="Speech Pattern Probability",
|
| 1873 |
+
info="Chance to add 'you know', 'I mean', etc.")
|
| 1874 |
+
subtle_error_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.05,
|
| 1875 |
+
label="Subtle Error Probability",
|
| 1876 |
+
info="Chance of subtle human errors")
|
| 1877 |
+
sentence_combine_prob = gr.Slider(0.0, 0.5, value=0.2, step=0.05,
|
| 1878 |
+
label="Sentence Combination Probability",
|
| 1879 |
+
info="Chance to naturally combine short sentences")
|
| 1880 |
+
|
| 1881 |
+
with gr.Tab("Flow Variations"):
|
| 1882 |
+
gr.Markdown("**Advanced human-like flow patterns**")
|
| 1883 |
+
stream_conscious_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
|
| 1884 |
+
label="Stream of Consciousness",
|
| 1885 |
+
info="Chance to add thinking interruptions")
|
| 1886 |
+
self_correction_prob = gr.Slider(0.0, 0.2, value=0.07, step=0.02,
|
| 1887 |
+
label="Self-Correction Probability",
|
| 1888 |
+
info="Chance to add 'or rather', 'I mean' corrections")
|
| 1889 |
+
thinking_loud_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.02,
|
| 1890 |
+
label="Thinking Out Loud",
|
| 1891 |
+
info="Chance to add 'Come to think of it' patterns")
|
| 1892 |
+
|
| 1893 |
+
with gr.Tab("Sentence Structure"):
|
| 1894 |
+
gr.Markdown("**Control sentence length variation**")
|
| 1895 |
+
long_sentence_threshold = gr.Slider(10, 40, value=20, step=2,
|
| 1896 |
+
label="Long Sentence Threshold",
|
| 1897 |
+
info="Words count to consider sentence 'long'")
|
| 1898 |
+
short_sentence_threshold = gr.Slider(5, 15, value=10, step=1,
|
| 1899 |
+
label="Short Sentence Threshold",
|
| 1900 |
+
info="Words count to consider sentence 'short'")
|
| 1901 |
+
|
| 1902 |
+
with gr.Accordion("Preset Configurations", open=False):
|
| 1903 |
+
gr.Markdown("""
|
| 1904 |
+
### Quick Presets:
|
| 1905 |
+
- **Balanced (Default)**: Current settings - good quality with high human score
|
| 1906 |
+
- **Maximum Human**: Increase all diversity and variation parameters
|
| 1907 |
+
- **Quality Focus**: Decrease variation parameters for cleaner output
|
| 1908 |
+
- **Natural Flow**: Increase flow variations and speech patterns
|
| 1909 |
+
""")
|
| 1910 |
+
|
| 1911 |
+
preset_buttons = gr.Row()
|
| 1912 |
+
with preset_buttons:
|
| 1913 |
+
balanced_btn = gr.Button("Load Balanced", scale=1)
|
| 1914 |
+
max_human_btn = gr.Button("Load Max Human", scale=1)
|
| 1915 |
+
quality_btn = gr.Button("Load Quality Focus", scale=1)
|
| 1916 |
+
natural_btn = gr.Button("Load Natural Flow", scale=1)
|
| 1917 |
+
|
| 1918 |
+
# Define preset configurations
|
| 1919 |
+
def load_balanced():
|
| 1920 |
+
return [75, 30, 0.85, 0.92, 1.4, 4, 0.3, 1.1, 0.9, 2,
|
| 1921 |
+
0.8, 0.15, 0.08, 0.02, 0.05, 0.15, 0.10, 0.2,
|
| 1922 |
+
0.08, 0.07, 0.10, 20, 10]
|
| 1923 |
|
| 1924 |
+
def load_max_human():
|
| 1925 |
+
return [90, 40, 0.95, 0.95, 1.5, 4, 0.4, 1.2, 0.95, 2,
|
| 1926 |
+
0.9, 0.20, 0.12, 0.04, 0.08, 0.25, 0.15, 0.3,
|
| 1927 |
+
0.15, 0.10, 0.15, 20, 10]
|
| 1928 |
|
| 1929 |
+
def load_quality():
|
| 1930 |
+
return [65, 20, 0.75, 0.88, 1.3, 4, 0.2, 1.0, 0.85, 3,
|
| 1931 |
+
0.7, 0.10, 0.05, 0.01, 0.03, 0.08, 0.05, 0.15,
|
| 1932 |
+
0.03, 0.03, 0.05, 25, 8]
|
| 1933 |
+
|
| 1934 |
+
def load_natural():
|
| 1935 |
+
return [70, 25, 0.82, 0.90, 1.4, 4, 0.35, 1.1, 0.9, 2,
|
| 1936 |
+
0.85, 0.12, 0.06, 0.02, 0.04, 0.20, 0.12, 0.25,
|
| 1937 |
+
0.12, 0.10, 0.15, 18, 12]
|
| 1938 |
+
|
| 1939 |
+
# All parameter components for preset updates
|
| 1940 |
+
all_params = [
|
| 1941 |
+
lex_diversity, order_diversity, temperature, top_p, length_multiplier, no_repeat_ngram,
|
| 1942 |
+
bart_usage_prob, bart_temperature, bart_top_p, bart_beams,
|
| 1943 |
+
contraction_prob, oxford_comma_prob, which_that_prob, typo_prob, natural_error_prob,
|
| 1944 |
+
speech_pattern_prob, subtle_error_prob, sentence_combine_prob,
|
| 1945 |
+
stream_conscious_prob, self_correction_prob, thinking_loud_prob,
|
| 1946 |
+
long_sentence_threshold, short_sentence_threshold
|
| 1947 |
+
]
|
| 1948 |
+
|
| 1949 |
+
# Connect preset buttons
|
| 1950 |
+
balanced_btn.click(load_balanced, outputs=all_params)
|
| 1951 |
+
max_human_btn.click(load_max_human, outputs=all_params)
|
| 1952 |
+
quality_btn.click(load_quality, outputs=all_params)
|
| 1953 |
+
natural_btn.click(load_natural, outputs=all_params)
|
| 1954 |
+
|
| 1955 |
+
# Connect main process button
|
| 1956 |
+
process_btn.click(
|
| 1957 |
+
humanize_html,
|
| 1958 |
+
inputs=[html_input] + all_params,
|
| 1959 |
+
outputs=html_output
|
| 1960 |
+
)
|
| 1961 |
+
|
| 1962 |
+
# Add example
|
| 1963 |
+
gr.Examples(
|
| 1964 |
+
examples=[["""<article>
|
| 1965 |
<h1>The Benefits of Regular Exercise</h1>
|
| 1966 |
<div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
|
| 1967 |
<p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
|
| 1968 |
<p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
|
| 1969 |
+
</article>"""]],
|
| 1970 |
+
inputs=html_input
|
| 1971 |
+
)
|
|
|
|
| 1972 |
|
| 1973 |
if __name__ == "__main__":
|
| 1974 |
# Enable queue for better handling of long-running processes
|