Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,14 +25,7 @@ except:
|
|
| 25 |
class HumanLikeVariations:
|
| 26 |
"""Add human-like variations and intentional imperfections"""
|
| 27 |
|
| 28 |
-
def __init__(self
|
| 29 |
-
typo_prob=0.02, natural_error_prob=0.05):
|
| 30 |
-
self.contraction_prob = contraction_prob
|
| 31 |
-
self.oxford_comma_prob = oxford_comma_prob
|
| 32 |
-
self.which_that_prob = which_that_prob
|
| 33 |
-
self.typo_prob = typo_prob
|
| 34 |
-
self.natural_error_prob = natural_error_prob
|
| 35 |
-
|
| 36 |
# Common human writing patterns - EXPANDED for Originality AI
|
| 37 |
self.casual_transitions = [
|
| 38 |
"So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
|
|
@@ -165,8 +158,8 @@ class HumanLikeVariations:
|
|
| 165 |
# Always use contractions where natural
|
| 166 |
sent = self.apply_contractions(sent)
|
| 167 |
|
| 168 |
-
# Add VERY occasional natural errors (
|
| 169 |
-
if random.random() <
|
| 170 |
error_types = [
|
| 171 |
# Missing comma in compound sentence
|
| 172 |
lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
|
|
@@ -206,30 +199,30 @@ class HumanLikeVariations:
|
|
| 206 |
}
|
| 207 |
|
| 208 |
for full, contr in contractions.items():
|
| 209 |
-
if random.random() <
|
| 210 |
text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
|
| 211 |
|
| 212 |
return text
|
| 213 |
|
| 214 |
def add_minor_errors(self, text):
|
| 215 |
"""Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
|
| 216 |
-
# Occasionally miss Oxford comma (
|
| 217 |
-
if random.random() <
|
| 218 |
# Only in lists, not random commas
|
| 219 |
text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
|
| 220 |
|
| 221 |
-
# Sometimes use 'which' instead of 'that' (
|
| 222 |
-
if random.random() <
|
| 223 |
# Only for non-restrictive clauses
|
| 224 |
matches = re.finditer(r'\b(\w+) that (\w+)', text)
|
| 225 |
for match in list(matches)[:1]: # Only first occurrence
|
| 226 |
if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
|
| 227 |
text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
|
| 228 |
|
| 229 |
-
# NEW: Add very occasional typos (
|
| 230 |
sentences = text.split('. ')
|
| 231 |
for i, sent in enumerate(sentences):
|
| 232 |
-
if random.random() <
|
| 233 |
words = sent.split()
|
| 234 |
# Pick a random word to potentially typo
|
| 235 |
word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
|
|
@@ -274,7 +267,7 @@ class HumanLikeVariations:
|
|
| 274 |
|
| 275 |
return text
|
| 276 |
|
| 277 |
-
def add_natural_human_patterns(self, text
|
| 278 |
"""Add natural human writing patterns that Originality AI associates with human text"""
|
| 279 |
sentences = self.split_into_sentences_advanced(text)
|
| 280 |
result_sentences = []
|
|
@@ -286,8 +279,8 @@ class HumanLikeVariations:
|
|
| 286 |
# Natural contractions throughout
|
| 287 |
sentence = self.apply_contractions(sentence)
|
| 288 |
|
| 289 |
-
# Add natural speech patterns (
|
| 290 |
-
if random.random() <
|
| 291 |
# Natural interruptions that humans actually use
|
| 292 |
if random.random() < 0.5:
|
| 293 |
# Add "you know" or "I mean" naturally
|
|
@@ -304,8 +297,8 @@ class HumanLikeVariations:
|
|
| 304 |
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 305 |
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 306 |
|
| 307 |
-
# Add subtle errors that humans make (
|
| 308 |
-
if random.random() <
|
| 309 |
words = sentence.split()
|
| 310 |
if len(words) > 5:
|
| 311 |
# Common comma omissions
|
|
@@ -318,8 +311,8 @@ class HumanLikeVariations:
|
|
| 318 |
words.insert(idx+1, words[idx])
|
| 319 |
sentence = ' '.join(words)
|
| 320 |
|
| 321 |
-
# Natural sentence combinations (
|
| 322 |
-
if i < len(sentences) - 1 and random.random() <
|
| 323 |
next_sent = sentences[i+1].strip()
|
| 324 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 325 |
# Natural connectors based on content
|
|
@@ -551,8 +544,11 @@ class EnhancedDipperHumanizer:
|
|
| 551 |
except:
|
| 552 |
print("BART model not available")
|
| 553 |
self.use_bart = False
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
-
def add_natural_human_patterns(self, text
|
| 556 |
"""Add natural human writing patterns that Originality AI associates with human text"""
|
| 557 |
sentences = self.split_into_sentences_advanced(text)
|
| 558 |
result_sentences = []
|
|
@@ -564,8 +560,8 @@ class EnhancedDipperHumanizer:
|
|
| 564 |
# Natural contractions throughout
|
| 565 |
sentence = self.apply_contractions(sentence)
|
| 566 |
|
| 567 |
-
# Add natural speech patterns (
|
| 568 |
-
if random.random() <
|
| 569 |
# Natural interruptions that humans actually use
|
| 570 |
if random.random() < 0.5:
|
| 571 |
# Add "you know" or "I mean" naturally
|
|
@@ -582,8 +578,8 @@ class EnhancedDipperHumanizer:
|
|
| 582 |
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 583 |
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 584 |
|
| 585 |
-
# Add subtle errors that humans make (
|
| 586 |
-
if random.random() <
|
| 587 |
words = sentence.split()
|
| 588 |
if len(words) > 5:
|
| 589 |
# Common comma omissions
|
|
@@ -596,8 +592,8 @@ class EnhancedDipperHumanizer:
|
|
| 596 |
words.insert(idx+1, words[idx])
|
| 597 |
sentence = ' '.join(words)
|
| 598 |
|
| 599 |
-
# Natural sentence combinations (
|
| 600 |
-
if i < len(sentences) - 1 and random.random() <
|
| 601 |
next_sent = sentences[i+1].strip()
|
| 602 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 603 |
# Natural connectors based on content
|
|
@@ -864,9 +860,7 @@ class EnhancedDipperHumanizer:
|
|
| 864 |
|
| 865 |
return text.strip()
|
| 866 |
|
| 867 |
-
def paraphrase_with_dipper(self, text, lex_diversity=
|
| 868 |
-
temperature=0.85, top_p=0.92, length_multiplier=1.4,
|
| 869 |
-
no_repeat_ngram=4):
|
| 870 |
"""Paraphrase text using Dipper model with sentence-level processing"""
|
| 871 |
if not text or len(text.strip()) < 3:
|
| 872 |
return text
|
|
@@ -884,17 +878,16 @@ class EnhancedDipperHumanizer:
|
|
| 884 |
continue
|
| 885 |
|
| 886 |
try:
|
| 887 |
-
#
|
| 888 |
if len(sentence.split()) < 10:
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
actual_order = max(order_diversity - 5, 15)
|
| 892 |
else:
|
| 893 |
-
|
| 894 |
-
|
| 895 |
|
| 896 |
-
lex_code = int(100 -
|
| 897 |
-
order_code = int(100 -
|
| 898 |
|
| 899 |
# Format input for Dipper
|
| 900 |
if self.is_dipper:
|
|
@@ -920,7 +913,11 @@ class EnhancedDipperHumanizer:
|
|
| 920 |
|
| 921 |
# Generate with appropriate variation
|
| 922 |
original_length = len(sentence.split())
|
| 923 |
-
max_new_length = int(original_length *
|
|
|
|
|
|
|
|
|
|
|
|
|
| 924 |
|
| 925 |
with torch.no_grad():
|
| 926 |
outputs = self.model.generate(
|
|
@@ -928,9 +925,9 @@ class EnhancedDipperHumanizer:
|
|
| 928 |
max_length=max_new_length + 20,
|
| 929 |
min_length=max(5, int(original_length * 0.7)),
|
| 930 |
do_sample=True,
|
| 931 |
-
top_p=
|
| 932 |
-
temperature=
|
| 933 |
-
no_repeat_ngram_size=
|
| 934 |
num_beams=1, # Greedy for more randomness
|
| 935 |
early_stopping=True
|
| 936 |
)
|
|
@@ -967,6 +964,9 @@ class EnhancedDipperHumanizer:
|
|
| 967 |
# Join sentences back
|
| 968 |
result = ' '.join(paraphrased_sentences)
|
| 969 |
|
|
|
|
|
|
|
|
|
|
| 970 |
return result
|
| 971 |
|
| 972 |
def fix_incomplete_sentence_smart(self, generated, original):
|
|
@@ -1055,7 +1055,7 @@ class EnhancedDipperHumanizer:
|
|
| 1055 |
# Clean up sentences
|
| 1056 |
return [s for s in sentences if s and len(s.strip()) > 0]
|
| 1057 |
|
| 1058 |
-
def paraphrase_with_bart(self, text
|
| 1059 |
"""Additional paraphrasing with BART for more variation"""
|
| 1060 |
if not self.use_bart or not text or len(text.strip()) < 3:
|
| 1061 |
return text
|
|
@@ -1091,10 +1091,10 @@ class EnhancedDipperHumanizer:
|
|
| 1091 |
**inputs,
|
| 1092 |
max_length=int(original_length * 1.4) + 10,
|
| 1093 |
min_length=max(5, int(original_length * 0.6)),
|
| 1094 |
-
num_beams=
|
| 1095 |
-
temperature=
|
| 1096 |
do_sample=True,
|
| 1097 |
-
top_p=
|
| 1098 |
early_stopping=True
|
| 1099 |
)
|
| 1100 |
|
|
@@ -1116,8 +1116,7 @@ class EnhancedDipperHumanizer:
|
|
| 1116 |
print(f"Error in BART paraphrasing: {str(e)}")
|
| 1117 |
return text
|
| 1118 |
|
| 1119 |
-
def apply_sentence_variation(self, text
|
| 1120 |
-
short_sentence_threshold=10):
|
| 1121 |
"""Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
|
| 1122 |
sentences = self.split_into_sentences_advanced(text)
|
| 1123 |
varied_sentences = []
|
|
@@ -1133,7 +1132,7 @@ class EnhancedDipperHumanizer:
|
|
| 1133 |
current_length = len(words)
|
| 1134 |
|
| 1135 |
# Natural sentence length variation
|
| 1136 |
-
if last_sentence_length >
|
| 1137 |
# Break up if two long sentences in a row
|
| 1138 |
if ',' in sentence:
|
| 1139 |
parts = sentence.split(',', 1)
|
|
@@ -1148,8 +1147,8 @@ class EnhancedDipperHumanizer:
|
|
| 1148 |
|
| 1149 |
# Natural combinations for flow
|
| 1150 |
if (i < len(sentences) - 1 and
|
| 1151 |
-
current_length <
|
| 1152 |
-
len(sentences[i+1].split()) <
|
| 1153 |
|
| 1154 |
next_sent = sentences[i+1].strip()
|
| 1155 |
# Only combine if it makes semantic sense
|
|
@@ -1365,8 +1364,7 @@ class EnhancedDipperHumanizer:
|
|
| 1365 |
|
| 1366 |
return html_text
|
| 1367 |
|
| 1368 |
-
def add_natural_flow_variations(self, text
|
| 1369 |
-
thinking_prob=0.10):
|
| 1370 |
"""Add more natural flow and rhythm variations for Originality AI"""
|
| 1371 |
sentences = self.split_into_sentences_advanced(text)
|
| 1372 |
enhanced_sentences = []
|
|
@@ -1375,8 +1373,8 @@ class EnhancedDipperHumanizer:
|
|
| 1375 |
if not sentence.strip():
|
| 1376 |
continue
|
| 1377 |
|
| 1378 |
-
# Add stream-of-consciousness elements (
|
| 1379 |
-
if random.random() <
|
| 1380 |
stream_elements = [
|
| 1381 |
" - wait, let me back up - ",
|
| 1382 |
" - actually, scratch that - ",
|
|
@@ -1390,8 +1388,8 @@ class EnhancedDipperHumanizer:
|
|
| 1390 |
words.insert(pos, random.choice(stream_elements))
|
| 1391 |
sentence = ' '.join(words)
|
| 1392 |
|
| 1393 |
-
# Add human-like self-corrections (
|
| 1394 |
-
if random.random() <
|
| 1395 |
corrections = [
|
| 1396 |
" - or rather, ",
|
| 1397 |
" - well, actually, ",
|
|
@@ -1409,8 +1407,8 @@ class EnhancedDipperHumanizer:
|
|
| 1409 |
words.insert(pos, correction)
|
| 1410 |
sentence = ' '.join(words)
|
| 1411 |
|
| 1412 |
-
# Add thinking-out-loud patterns (
|
| 1413 |
-
if random.random() <
|
| 1414 |
thinking_patterns = [
|
| 1415 |
"Come to think of it, ",
|
| 1416 |
"Actually, you know what? ",
|
|
@@ -1428,45 +1426,11 @@ class EnhancedDipperHumanizer:
|
|
| 1428 |
|
| 1429 |
return ' '.join(enhanced_sentences)
|
| 1430 |
|
| 1431 |
-
def process_html(self, html_content, progress_callback=None
|
| 1432 |
-
"""Main processing function with progress callback
|
| 1433 |
if not html_content.strip():
|
| 1434 |
return "Please provide HTML content."
|
| 1435 |
|
| 1436 |
-
# Extract all parameters with defaults
|
| 1437 |
-
lex_diversity = kwargs.get('lex_diversity', 75)
|
| 1438 |
-
order_diversity = kwargs.get('order_diversity', 30)
|
| 1439 |
-
temperature = kwargs.get('temperature', 0.85)
|
| 1440 |
-
top_p = kwargs.get('top_p', 0.92)
|
| 1441 |
-
length_multiplier = kwargs.get('length_multiplier', 1.4)
|
| 1442 |
-
no_repeat_ngram = kwargs.get('no_repeat_ngram', 4)
|
| 1443 |
-
bart_usage_prob = kwargs.get('bart_usage_prob', 0.3)
|
| 1444 |
-
bart_temperature = kwargs.get('bart_temperature', 1.1)
|
| 1445 |
-
bart_top_p = kwargs.get('bart_top_p', 0.9)
|
| 1446 |
-
bart_beams = kwargs.get('bart_beams', 2)
|
| 1447 |
-
contraction_prob = kwargs.get('contraction_prob', 0.8)
|
| 1448 |
-
oxford_comma_prob = kwargs.get('oxford_comma_prob', 0.15)
|
| 1449 |
-
which_that_prob = kwargs.get('which_that_prob', 0.08)
|
| 1450 |
-
typo_prob = kwargs.get('typo_prob', 0.02)
|
| 1451 |
-
natural_error_prob = kwargs.get('natural_error_prob', 0.05)
|
| 1452 |
-
speech_pattern_prob = kwargs.get('speech_pattern_prob', 0.15)
|
| 1453 |
-
subtle_error_prob = kwargs.get('subtle_error_prob', 0.10)
|
| 1454 |
-
sentence_combine_prob = kwargs.get('sentence_combine_prob', 0.2)
|
| 1455 |
-
stream_conscious_prob = kwargs.get('stream_conscious_prob', 0.08)
|
| 1456 |
-
self_correction_prob = kwargs.get('self_correction_prob', 0.07)
|
| 1457 |
-
thinking_loud_prob = kwargs.get('thinking_loud_prob', 0.10)
|
| 1458 |
-
long_sentence_threshold = kwargs.get('long_sentence_threshold', 20)
|
| 1459 |
-
short_sentence_threshold = kwargs.get('short_sentence_threshold', 10)
|
| 1460 |
-
|
| 1461 |
-
# Initialize human variations with parameters
|
| 1462 |
-
self.human_variations = HumanLikeVariations(
|
| 1463 |
-
contraction_prob=contraction_prob,
|
| 1464 |
-
oxford_comma_prob=oxford_comma_prob,
|
| 1465 |
-
which_that_prob=which_that_prob,
|
| 1466 |
-
typo_prob=typo_prob,
|
| 1467 |
-
natural_error_prob=natural_error_prob
|
| 1468 |
-
)
|
| 1469 |
-
|
| 1470 |
# Store all script and style content to preserve it
|
| 1471 |
script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
|
| 1472 |
style_placeholder = "###STYLE_PLACEHOLDER_{}###"
|
|
@@ -1512,49 +1476,24 @@ class EnhancedDipperHumanizer:
|
|
| 1512 |
if len(original_text.split()) < 3:
|
| 1513 |
continue
|
| 1514 |
|
| 1515 |
-
# First pass with Dipper
|
| 1516 |
paraphrased_text = self.paraphrase_with_dipper(
|
| 1517 |
original_text,
|
| 1518 |
-
lex_diversity=
|
| 1519 |
-
order_diversity=
|
| 1520 |
-
temperature=temperature,
|
| 1521 |
-
top_p=top_p,
|
| 1522 |
-
length_multiplier=length_multiplier,
|
| 1523 |
-
no_repeat_ngram=no_repeat_ngram
|
| 1524 |
-
)
|
| 1525 |
-
|
| 1526 |
-
# Add natural human patterns with configured probabilities
|
| 1527 |
-
paraphrased_text = self.add_natural_human_patterns(
|
| 1528 |
-
paraphrased_text,
|
| 1529 |
-
speech_prob=speech_pattern_prob,
|
| 1530 |
-
error_prob=subtle_error_prob,
|
| 1531 |
-
combine_prob=sentence_combine_prob
|
| 1532 |
)
|
| 1533 |
|
| 1534 |
-
# Second pass with BART for longer texts (
|
| 1535 |
if self.use_bart and len(paraphrased_text.split()) > 8:
|
| 1536 |
-
|
| 1537 |
-
|
| 1538 |
-
|
| 1539 |
-
bart_temperature=bart_temperature,
|
| 1540 |
-
bart_top_p=bart_top_p,
|
| 1541 |
-
bart_beams=bart_beams
|
| 1542 |
-
)
|
| 1543 |
|
| 1544 |
-
# Apply sentence variation
|
| 1545 |
-
paraphrased_text = self.apply_sentence_variation(
|
| 1546 |
-
paraphrased_text,
|
| 1547 |
-
long_sentence_threshold=long_sentence_threshold,
|
| 1548 |
-
short_sentence_threshold=short_sentence_threshold
|
| 1549 |
-
)
|
| 1550 |
|
| 1551 |
-
# Add natural flow variations
|
| 1552 |
-
paraphrased_text = self.add_natural_flow_variations(
|
| 1553 |
-
paraphrased_text,
|
| 1554 |
-
stream_prob=stream_conscious_prob,
|
| 1555 |
-
correction_prob=self_correction_prob,
|
| 1556 |
-
thinking_prob=thinking_loud_prob
|
| 1557 |
-
)
|
| 1558 |
|
| 1559 |
# Fix punctuation and formatting
|
| 1560 |
paraphrased_text = self.fix_punctuation(paraphrased_text)
|
|
@@ -1719,24 +1658,8 @@ class EnhancedDipperHumanizer:
|
|
| 1719 |
# Initialize the humanizer
|
| 1720 |
humanizer = EnhancedDipperHumanizer()
|
| 1721 |
|
| 1722 |
-
def humanize_html(html_input,
|
| 1723 |
-
|
| 1724 |
-
lex_diversity=75, order_diversity=30,
|
| 1725 |
-
# Generation Parameters
|
| 1726 |
-
temperature=0.85, top_p=0.92, length_multiplier=1.4, no_repeat_ngram=4,
|
| 1727 |
-
# BART Parameters
|
| 1728 |
-
bart_usage_prob=0.3, bart_temperature=1.1, bart_top_p=0.9, bart_beams=2,
|
| 1729 |
-
# Human Variation Parameters
|
| 1730 |
-
contraction_prob=0.8, oxford_comma_prob=0.15, which_that_prob=0.08,
|
| 1731 |
-
typo_prob=0.02, natural_error_prob=0.05,
|
| 1732 |
-
# Human Pattern Frequencies
|
| 1733 |
-
speech_pattern_prob=0.15, subtle_error_prob=0.10, sentence_combine_prob=0.2,
|
| 1734 |
-
# Flow Variation Parameters
|
| 1735 |
-
stream_conscious_prob=0.08, self_correction_prob=0.07, thinking_loud_prob=0.10,
|
| 1736 |
-
# Sentence Variation Parameters
|
| 1737 |
-
long_sentence_threshold=20, short_sentence_threshold=10,
|
| 1738 |
-
progress=gr.Progress()):
|
| 1739 |
-
"""Gradio interface function with progress updates and all parameters"""
|
| 1740 |
if not html_input:
|
| 1741 |
return "Please provide HTML content to humanize."
|
| 1742 |
|
|
@@ -1748,33 +1671,10 @@ def humanize_html(html_input,
|
|
| 1748 |
if total > 0:
|
| 1749 |
progress(current / total, desc=f"Processing: {current}/{total} elements")
|
| 1750 |
|
| 1751 |
-
# Pass
|
| 1752 |
result = humanizer.process_html(
|
| 1753 |
html_input,
|
| 1754 |
-
progress_callback=progress_callback
|
| 1755 |
-
lex_diversity=lex_diversity,
|
| 1756 |
-
order_diversity=order_diversity,
|
| 1757 |
-
temperature=temperature,
|
| 1758 |
-
top_p=top_p,
|
| 1759 |
-
length_multiplier=length_multiplier,
|
| 1760 |
-
no_repeat_ngram=no_repeat_ngram,
|
| 1761 |
-
bart_usage_prob=bart_usage_prob,
|
| 1762 |
-
bart_temperature=bart_temperature,
|
| 1763 |
-
bart_top_p=bart_top_p,
|
| 1764 |
-
bart_beams=bart_beams,
|
| 1765 |
-
contraction_prob=contraction_prob,
|
| 1766 |
-
oxford_comma_prob=oxford_comma_prob,
|
| 1767 |
-
which_that_prob=which_that_prob,
|
| 1768 |
-
typo_prob=typo_prob,
|
| 1769 |
-
natural_error_prob=natural_error_prob,
|
| 1770 |
-
speech_pattern_prob=speech_pattern_prob,
|
| 1771 |
-
subtle_error_prob=subtle_error_prob,
|
| 1772 |
-
sentence_combine_prob=sentence_combine_prob,
|
| 1773 |
-
stream_conscious_prob=stream_conscious_prob,
|
| 1774 |
-
self_correction_prob=self_correction_prob,
|
| 1775 |
-
thinking_loud_prob=thinking_loud_prob,
|
| 1776 |
-
long_sentence_threshold=long_sentence_threshold,
|
| 1777 |
-
short_sentence_threshold=short_sentence_threshold
|
| 1778 |
)
|
| 1779 |
|
| 1780 |
processing_time = time.time() - start_time
|
|
@@ -1783,192 +1683,47 @@ def humanize_html(html_input,
|
|
| 1783 |
|
| 1784 |
return result
|
| 1785 |
|
| 1786 |
-
# Create Gradio interface with
|
| 1787 |
-
|
| 1788 |
-
|
| 1789 |
-
|
| 1790 |
-
|
| 1791 |
-
|
| 1792 |
-
|
| 1793 |
-
|
| 1794 |
-
|
| 1795 |
-
|
| 1796 |
-
|
| 1797 |
-
|
| 1798 |
-
|
| 1799 |
-
|
| 1800 |
-
|
| 1801 |
-
|
| 1802 |
-
|
| 1803 |
-
process_btn = gr.Button("Process HTML", variant="primary")
|
| 1804 |
-
|
| 1805 |
-
with gr.Column(scale=1):
|
| 1806 |
-
html_output = gr.Textbox(
|
| 1807 |
-
lines=10,
|
| 1808 |
-
label="Humanized HTML Output"
|
| 1809 |
-
)
|
| 1810 |
-
|
| 1811 |
-
with gr.Tabs():
|
| 1812 |
-
with gr.Tab("Diversity Settings"):
|
| 1813 |
-
gr.Markdown("**Controls how much the text is varied from the original**")
|
| 1814 |
-
lex_diversity = gr.Slider(0, 100, value=75, step=5,
|
| 1815 |
-
label="Lexical Diversity",
|
| 1816 |
-
info="Higher = more word variation (75 balanced, 90+ for max human score)")
|
| 1817 |
-
order_diversity = gr.Slider(0, 100, value=30, step=5,
|
| 1818 |
-
label="Order Diversity",
|
| 1819 |
-
info="Higher = more word reordering (30 balanced, 40+ for max human score)")
|
| 1820 |
-
|
| 1821 |
-
with gr.Tab("Generation Parameters"):
|
| 1822 |
-
gr.Markdown("**Fine-tune the AI model's text generation behavior**")
|
| 1823 |
-
temperature = gr.Slider(0.1, 2.0, value=0.85, step=0.05,
|
| 1824 |
-
label="Temperature",
|
| 1825 |
-
info="Higher = more randomness (0.85 balanced, 0.9+ for max human)")
|
| 1826 |
-
top_p = gr.Slider(0.1, 1.0, value=0.92, step=0.02,
|
| 1827 |
-
label="Top-p (nucleus sampling)",
|
| 1828 |
-
info="Higher = wider token selection (0.92 balanced, 0.95 for max human)")
|
| 1829 |
-
length_multiplier = gr.Slider(1.1, 2.0, value=1.4, step=0.1,
|
| 1830 |
-
label="Length Multiplier",
|
| 1831 |
-
info="How much longer/shorter output can be vs input")
|
| 1832 |
-
no_repeat_ngram = gr.Slider(2, 6, value=4, step=1,
|
| 1833 |
-
label="No Repeat N-gram Size",
|
| 1834 |
-
info="Prevents repetition of N-word phrases (4 is balanced)")
|
| 1835 |
-
|
| 1836 |
-
with gr.Tab("BART Parameters"):
|
| 1837 |
-
gr.Markdown("**Settings for secondary BART paraphrasing model**")
|
| 1838 |
-
bart_usage_prob = gr.Slider(0.0, 1.0, value=0.3, step=0.05,
|
| 1839 |
-
label="BART Usage Probability",
|
| 1840 |
-
info="Chance to use BART for additional variation")
|
| 1841 |
-
bart_temperature = gr.Slider(0.7, 1.5, value=1.1, step=0.05,
|
| 1842 |
-
label="BART Temperature",
|
| 1843 |
-
info="Temperature for BART model")
|
| 1844 |
-
bart_top_p = gr.Slider(0.8, 1.0, value=0.9, step=0.02,
|
| 1845 |
-
label="BART Top-p",
|
| 1846 |
-
info="Top-p for BART model")
|
| 1847 |
-
bart_beams = gr.Slider(1, 4, value=2, step=1,
|
| 1848 |
-
label="BART Beam Size",
|
| 1849 |
-
info="Number of beams for BART generation")
|
| 1850 |
-
|
| 1851 |
-
with gr.Tab("Human Variations"):
|
| 1852 |
-
gr.Markdown("**Control natural human-like writing patterns**")
|
| 1853 |
-
contraction_prob = gr.Slider(0.0, 1.0, value=0.8, step=0.05,
|
| 1854 |
-
label="Contraction Probability",
|
| 1855 |
-
info="Chance to use contractions (it's vs it is)")
|
| 1856 |
-
oxford_comma_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
|
| 1857 |
-
label="Oxford Comma Skip Probability",
|
| 1858 |
-
info="Chance to skip Oxford comma (human-like error)")
|
| 1859 |
-
which_that_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
|
| 1860 |
-
label="Which/That Substitution",
|
| 1861 |
-
info="Chance to use 'which' instead of 'that'")
|
| 1862 |
-
typo_prob = gr.Slider(0.0, 0.1, value=0.02, step=0.01,
|
| 1863 |
-
label="Typo Probability",
|
| 1864 |
-
info="Chance of natural typos per sentence")
|
| 1865 |
-
natural_error_prob = gr.Slider(0.0, 0.2, value=0.05, step=0.01,
|
| 1866 |
-
label="Natural Error Probability",
|
| 1867 |
-
info="Chance of human-like errors (missing commas, etc)")
|
| 1868 |
-
|
| 1869 |
-
with gr.Tab("Human Pattern Frequencies"):
|
| 1870 |
-
gr.Markdown("**Frequency of conversational elements**")
|
| 1871 |
-
speech_pattern_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05,
|
| 1872 |
-
label="Speech Pattern Probability",
|
| 1873 |
-
info="Chance to add 'you know', 'I mean', etc.")
|
| 1874 |
-
subtle_error_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.05,
|
| 1875 |
-
label="Subtle Error Probability",
|
| 1876 |
-
info="Chance of subtle human errors")
|
| 1877 |
-
sentence_combine_prob = gr.Slider(0.0, 0.5, value=0.2, step=0.05,
|
| 1878 |
-
label="Sentence Combination Probability",
|
| 1879 |
-
info="Chance to naturally combine short sentences")
|
| 1880 |
-
|
| 1881 |
-
with gr.Tab("Flow Variations"):
|
| 1882 |
-
gr.Markdown("**Advanced human-like flow patterns**")
|
| 1883 |
-
stream_conscious_prob = gr.Slider(0.0, 0.3, value=0.08, step=0.02,
|
| 1884 |
-
label="Stream of Consciousness",
|
| 1885 |
-
info="Chance to add thinking interruptions")
|
| 1886 |
-
self_correction_prob = gr.Slider(0.0, 0.2, value=0.07, step=0.02,
|
| 1887 |
-
label="Self-Correction Probability",
|
| 1888 |
-
info="Chance to add 'or rather', 'I mean' corrections")
|
| 1889 |
-
thinking_loud_prob = gr.Slider(0.0, 0.3, value=0.10, step=0.02,
|
| 1890 |
-
label="Thinking Out Loud",
|
| 1891 |
-
info="Chance to add 'Come to think of it' patterns")
|
| 1892 |
-
|
| 1893 |
-
with gr.Tab("Sentence Structure"):
|
| 1894 |
-
gr.Markdown("**Control sentence length variation**")
|
| 1895 |
-
long_sentence_threshold = gr.Slider(10, 40, value=20, step=2,
|
| 1896 |
-
label="Long Sentence Threshold",
|
| 1897 |
-
info="Words count to consider sentence 'long'")
|
| 1898 |
-
short_sentence_threshold = gr.Slider(5, 15, value=10, step=1,
|
| 1899 |
-
label="Short Sentence Threshold",
|
| 1900 |
-
info="Words count to consider sentence 'short'")
|
| 1901 |
-
|
| 1902 |
-
with gr.Accordion("Preset Configurations", open=False):
|
| 1903 |
-
gr.Markdown("""
|
| 1904 |
-
### Quick Presets:
|
| 1905 |
-
- **Balanced (Default)**: Current settings - good quality with high human score
|
| 1906 |
-
- **Maximum Human**: Increase all diversity and variation parameters
|
| 1907 |
-
- **Quality Focus**: Decrease variation parameters for cleaner output
|
| 1908 |
-
- **Natural Flow**: Increase flow variations and speech patterns
|
| 1909 |
-
""")
|
| 1910 |
-
|
| 1911 |
-
preset_buttons = gr.Row()
|
| 1912 |
-
with preset_buttons:
|
| 1913 |
-
balanced_btn = gr.Button("Load Balanced", scale=1)
|
| 1914 |
-
max_human_btn = gr.Button("Load Max Human", scale=1)
|
| 1915 |
-
quality_btn = gr.Button("Load Quality Focus", scale=1)
|
| 1916 |
-
natural_btn = gr.Button("Load Natural Flow", scale=1)
|
| 1917 |
-
|
| 1918 |
-
# Define preset configurations
|
| 1919 |
-
def load_balanced():
|
| 1920 |
-
return [75, 30, 0.85, 0.92, 1.4, 4, 0.3, 1.1, 0.9, 2,
|
| 1921 |
-
0.8, 0.15, 0.08, 0.02, 0.05, 0.15, 0.10, 0.2,
|
| 1922 |
-
0.08, 0.07, 0.10, 20, 10]
|
| 1923 |
-
|
| 1924 |
-
def load_max_human():
|
| 1925 |
-
return [90, 40, 0.95, 0.95, 1.5, 4, 0.4, 1.2, 0.95, 2,
|
| 1926 |
-
0.9, 0.20, 0.12, 0.04, 0.08, 0.25, 0.15, 0.3,
|
| 1927 |
-
0.15, 0.10, 0.15, 20, 10]
|
| 1928 |
-
|
| 1929 |
-
def load_quality():
|
| 1930 |
-
return [65, 20, 0.75, 0.88, 1.3, 4, 0.2, 1.0, 0.85, 3,
|
| 1931 |
-
0.7, 0.10, 0.05, 0.01, 0.03, 0.08, 0.05, 0.15,
|
| 1932 |
-
0.03, 0.03, 0.05, 25, 8]
|
| 1933 |
-
|
| 1934 |
-
def load_natural():
|
| 1935 |
-
return [70, 25, 0.82, 0.90, 1.4, 4, 0.35, 1.1, 0.9, 2,
|
| 1936 |
-
0.85, 0.12, 0.06, 0.02, 0.04, 0.20, 0.12, 0.25,
|
| 1937 |
-
0.12, 0.10, 0.15, 18, 12]
|
| 1938 |
-
|
| 1939 |
-
# All parameter components for preset updates
|
| 1940 |
-
all_params = [
|
| 1941 |
-
lex_diversity, order_diversity, temperature, top_p, length_multiplier, no_repeat_ngram,
|
| 1942 |
-
bart_usage_prob, bart_temperature, bart_top_p, bart_beams,
|
| 1943 |
-
contraction_prob, oxford_comma_prob, which_that_prob, typo_prob, natural_error_prob,
|
| 1944 |
-
speech_pattern_prob, subtle_error_prob, sentence_combine_prob,
|
| 1945 |
-
stream_conscious_prob, self_correction_prob, thinking_loud_prob,
|
| 1946 |
-
long_sentence_threshold, short_sentence_threshold
|
| 1947 |
-
]
|
| 1948 |
|
| 1949 |
-
|
| 1950 |
-
|
| 1951 |
-
|
| 1952 |
-
|
| 1953 |
-
|
|
|
|
|
|
|
|
|
|
| 1954 |
|
| 1955 |
-
|
| 1956 |
-
process_btn.click(
|
| 1957 |
-
humanize_html,
|
| 1958 |
-
inputs=[html_input] + all_params,
|
| 1959 |
-
outputs=html_output
|
| 1960 |
-
)
|
| 1961 |
|
| 1962 |
-
|
| 1963 |
-
|
| 1964 |
-
|
|
|
|
| 1965 |
<h1>The Benefits of Regular Exercise</h1>
|
| 1966 |
<div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
|
| 1967 |
<p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
|
| 1968 |
<p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
|
| 1969 |
-
</article>"""]
|
| 1970 |
-
|
| 1971 |
-
|
|
|
|
| 1972 |
|
| 1973 |
if __name__ == "__main__":
|
| 1974 |
# Enable queue for better handling of long-running processes
|
|
|
|
| 25 |
class HumanLikeVariations:
|
| 26 |
"""Add human-like variations and intentional imperfections"""
|
| 27 |
|
| 28 |
+
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Common human writing patterns - EXPANDED for Originality AI
|
| 30 |
self.casual_transitions = [
|
| 31 |
"So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
|
|
|
|
| 158 |
# Always use contractions where natural
|
| 159 |
sent = self.apply_contractions(sent)
|
| 160 |
|
| 161 |
+
# Add VERY occasional natural errors (5% chance)
|
| 162 |
+
if random.random() < 0.05 and len(sent.split()) > 15:
|
| 163 |
error_types = [
|
| 164 |
# Missing comma in compound sentence
|
| 165 |
lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
|
|
|
|
| 199 |
}
|
| 200 |
|
| 201 |
for full, contr in contractions.items():
|
| 202 |
+
if random.random() < 0.8: # 80% chance to apply each contraction
|
| 203 |
text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
|
| 204 |
|
| 205 |
return text
|
| 206 |
|
| 207 |
def add_minor_errors(self, text):
|
| 208 |
"""Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
|
| 209 |
+
# Occasionally miss Oxford comma (15% chance)
|
| 210 |
+
if random.random() < 0.15:
|
| 211 |
# Only in lists, not random commas
|
| 212 |
text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
|
| 213 |
|
| 214 |
+
# Sometimes use 'which' instead of 'that' (8% chance)
|
| 215 |
+
if random.random() < 0.08:
|
| 216 |
# Only for non-restrictive clauses
|
| 217 |
matches = re.finditer(r'\b(\w+) that (\w+)', text)
|
| 218 |
for match in list(matches)[:1]: # Only first occurrence
|
| 219 |
if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
|
| 220 |
text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
|
| 221 |
|
| 222 |
+
# NEW: Add very occasional typos (2% chance per sentence) - REDUCED AND CONTROLLED
|
| 223 |
sentences = text.split('. ')
|
| 224 |
for i, sent in enumerate(sentences):
|
| 225 |
+
if random.random() < 0.02 and len(sent.split()) > 15: # Only in longer sentences
|
| 226 |
words = sent.split()
|
| 227 |
# Pick a random word to potentially typo
|
| 228 |
word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
|
|
|
|
| 267 |
|
| 268 |
return text
|
| 269 |
|
| 270 |
+
def add_natural_human_patterns(self, text):
|
| 271 |
"""Add natural human writing patterns that Originality AI associates with human text"""
|
| 272 |
sentences = self.split_into_sentences_advanced(text)
|
| 273 |
result_sentences = []
|
|
|
|
| 279 |
# Natural contractions throughout
|
| 280 |
sentence = self.apply_contractions(sentence)
|
| 281 |
|
| 282 |
+
# Add natural speech patterns (15% chance)
|
| 283 |
+
if random.random() < 0.15 and len(sentence.split()) > 10:
|
| 284 |
# Natural interruptions that humans actually use
|
| 285 |
if random.random() < 0.5:
|
| 286 |
# Add "you know" or "I mean" naturally
|
|
|
|
| 297 |
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 298 |
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 299 |
|
| 300 |
+
# Add subtle errors that humans make (10% chance - reduced)
|
| 301 |
+
if random.random() < 0.10:
|
| 302 |
words = sentence.split()
|
| 303 |
if len(words) > 5:
|
| 304 |
# Common comma omissions
|
|
|
|
| 311 |
words.insert(idx+1, words[idx])
|
| 312 |
sentence = ' '.join(words)
|
| 313 |
|
| 314 |
+
# Natural sentence combinations (20% chance)
|
| 315 |
+
if i < len(sentences) - 1 and random.random() < 0.2:
|
| 316 |
next_sent = sentences[i+1].strip()
|
| 317 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 318 |
# Natural connectors based on content
|
|
|
|
| 544 |
except:
|
| 545 |
print("BART model not available")
|
| 546 |
self.use_bart = False
|
| 547 |
+
|
| 548 |
+
# Initialize human variations handler
|
| 549 |
+
self.human_variations = HumanLikeVariations()
|
| 550 |
|
| 551 |
+
def add_natural_human_patterns(self, text):
|
| 552 |
"""Add natural human writing patterns that Originality AI associates with human text"""
|
| 553 |
sentences = self.split_into_sentences_advanced(text)
|
| 554 |
result_sentences = []
|
|
|
|
| 560 |
# Natural contractions throughout
|
| 561 |
sentence = self.apply_contractions(sentence)
|
| 562 |
|
| 563 |
+
# Add natural speech patterns (15% chance - balanced)
|
| 564 |
+
if random.random() < 0.15 and len(sentence.split()) > 10:
|
| 565 |
# Natural interruptions that humans actually use
|
| 566 |
if random.random() < 0.5:
|
| 567 |
# Add "you know" or "I mean" naturally
|
|
|
|
| 578 |
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 579 |
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 580 |
|
| 581 |
+
# Add subtle errors that humans make (8% chance)
|
| 582 |
+
if random.random() < 0.08:
|
| 583 |
words = sentence.split()
|
| 584 |
if len(words) > 5:
|
| 585 |
# Common comma omissions
|
|
|
|
| 592 |
words.insert(idx+1, words[idx])
|
| 593 |
sentence = ' '.join(words)
|
| 594 |
|
| 595 |
+
# Natural sentence combinations (20% chance)
|
| 596 |
+
if i < len(sentences) - 1 and random.random() < 0.2:
|
| 597 |
next_sent = sentences[i+1].strip()
|
| 598 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 599 |
# Natural connectors based on content
|
|
|
|
| 860 |
|
| 861 |
return text.strip()
|
| 862 |
|
| 863 |
+
def paraphrase_with_dipper(self, text, lex_diversity=60, order_diversity=20):
|
|
|
|
|
|
|
| 864 |
"""Paraphrase text using Dipper model with sentence-level processing"""
|
| 865 |
if not text or len(text.strip()) < 3:
|
| 866 |
return text
|
|
|
|
| 878 |
continue
|
| 879 |
|
| 880 |
try:
|
| 881 |
+
# BALANCED diversity for Originality AI (100% human with better quality)
|
| 882 |
if len(sentence.split()) < 10:
|
| 883 |
+
lex_diversity = 70 # High but not extreme
|
| 884 |
+
order_diversity = 25
|
|
|
|
| 885 |
else:
|
| 886 |
+
lex_diversity = 75 # Balanced diversity
|
| 887 |
+
order_diversity = 30 # Moderate order diversity
|
| 888 |
|
| 889 |
+
lex_code = int(100 - lex_diversity)
|
| 890 |
+
order_code = int(100 - order_diversity)
|
| 891 |
|
| 892 |
# Format input for Dipper
|
| 893 |
if self.is_dipper:
|
|
|
|
| 913 |
|
| 914 |
# Generate with appropriate variation
|
| 915 |
original_length = len(sentence.split())
|
| 916 |
+
max_new_length = int(original_length * 1.4)
|
| 917 |
+
|
| 918 |
+
# High variation parameters
|
| 919 |
+
temp = 0.85 # Slightly reduced from 0.9
|
| 920 |
+
top_p_val = 0.92 # Slightly reduced from 0.95
|
| 921 |
|
| 922 |
with torch.no_grad():
|
| 923 |
outputs = self.model.generate(
|
|
|
|
| 925 |
max_length=max_new_length + 20,
|
| 926 |
min_length=max(5, int(original_length * 0.7)),
|
| 927 |
do_sample=True,
|
| 928 |
+
top_p=top_p_val,
|
| 929 |
+
temperature=temp,
|
| 930 |
+
no_repeat_ngram_size=4, # Allow more repetition for naturalness
|
| 931 |
num_beams=1, # Greedy for more randomness
|
| 932 |
early_stopping=True
|
| 933 |
)
|
|
|
|
| 964 |
# Join sentences back
|
| 965 |
result = ' '.join(paraphrased_sentences)
|
| 966 |
|
| 967 |
+
# Apply natural human patterns
|
| 968 |
+
result = self.add_natural_human_patterns(result)
|
| 969 |
+
|
| 970 |
return result
|
| 971 |
|
| 972 |
def fix_incomplete_sentence_smart(self, generated, original):
|
|
|
|
| 1055 |
# Clean up sentences
|
| 1056 |
return [s for s in sentences if s and len(s.strip()) > 0]
|
| 1057 |
|
| 1058 |
+
def paraphrase_with_bart(self, text):
|
| 1059 |
"""Additional paraphrasing with BART for more variation"""
|
| 1060 |
if not self.use_bart or not text or len(text.strip()) < 3:
|
| 1061 |
return text
|
|
|
|
| 1091 |
**inputs,
|
| 1092 |
max_length=int(original_length * 1.4) + 10,
|
| 1093 |
min_length=max(5, int(original_length * 0.6)),
|
| 1094 |
+
num_beams=2,
|
| 1095 |
+
temperature=1.1, # Higher temperature
|
| 1096 |
do_sample=True,
|
| 1097 |
+
top_p=0.9,
|
| 1098 |
early_stopping=True
|
| 1099 |
)
|
| 1100 |
|
|
|
|
| 1116 |
print(f"Error in BART paraphrasing: {str(e)}")
|
| 1117 |
return text
|
| 1118 |
|
| 1119 |
+
def apply_sentence_variation(self, text):
|
|
|
|
| 1120 |
"""Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
|
| 1121 |
sentences = self.split_into_sentences_advanced(text)
|
| 1122 |
varied_sentences = []
|
|
|
|
| 1132 |
current_length = len(words)
|
| 1133 |
|
| 1134 |
# Natural sentence length variation
|
| 1135 |
+
if last_sentence_length > 20 and current_length > 20:
|
| 1136 |
# Break up if two long sentences in a row
|
| 1137 |
if ',' in sentence:
|
| 1138 |
parts = sentence.split(',', 1)
|
|
|
|
| 1147 |
|
| 1148 |
# Natural combinations for flow
|
| 1149 |
if (i < len(sentences) - 1 and
|
| 1150 |
+
current_length < 10 and
|
| 1151 |
+
len(sentences[i+1].split()) < 10):
|
| 1152 |
|
| 1153 |
next_sent = sentences[i+1].strip()
|
| 1154 |
# Only combine if it makes semantic sense
|
|
|
|
| 1364 |
|
| 1365 |
return html_text
|
| 1366 |
|
| 1367 |
+
def add_natural_flow_variations(self, text):
|
|
|
|
| 1368 |
"""Add more natural flow and rhythm variations for Originality AI"""
|
| 1369 |
sentences = self.split_into_sentences_advanced(text)
|
| 1370 |
enhanced_sentences = []
|
|
|
|
| 1373 |
if not sentence.strip():
|
| 1374 |
continue
|
| 1375 |
|
| 1376 |
+
# Add stream-of-consciousness elements (8% chance - reduced)
|
| 1377 |
+
if random.random() < 0.08 and len(sentence.split()) > 10:
|
| 1378 |
stream_elements = [
|
| 1379 |
" - wait, let me back up - ",
|
| 1380 |
" - actually, scratch that - ",
|
|
|
|
| 1388 |
words.insert(pos, random.choice(stream_elements))
|
| 1389 |
sentence = ' '.join(words)
|
| 1390 |
|
| 1391 |
+
# Add human-like self-corrections (7% chance - reduced)
|
| 1392 |
+
if random.random() < 0.07:
|
| 1393 |
corrections = [
|
| 1394 |
" - or rather, ",
|
| 1395 |
" - well, actually, ",
|
|
|
|
| 1407 |
words.insert(pos, correction)
|
| 1408 |
sentence = ' '.join(words)
|
| 1409 |
|
| 1410 |
+
# Add thinking-out-loud patterns (10% chance - reduced)
|
| 1411 |
+
if random.random() < 0.10 and i > 0:
|
| 1412 |
thinking_patterns = [
|
| 1413 |
"Come to think of it, ",
|
| 1414 |
"Actually, you know what? ",
|
|
|
|
| 1426 |
|
| 1427 |
return ' '.join(enhanced_sentences)
|
| 1428 |
|
| 1429 |
+
def process_html(self, html_content, progress_callback=None):
|
| 1430 |
+
"""Main processing function with progress callback"""
|
| 1431 |
if not html_content.strip():
|
| 1432 |
return "Please provide HTML content."
|
| 1433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1434 |
# Store all script and style content to preserve it
|
| 1435 |
script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
|
| 1436 |
style_placeholder = "###STYLE_PLACEHOLDER_{}###"
|
|
|
|
| 1476 |
if len(original_text.split()) < 3:
|
| 1477 |
continue
|
| 1478 |
|
| 1479 |
+
# First pass with Dipper
|
| 1480 |
paraphrased_text = self.paraphrase_with_dipper(
|
| 1481 |
original_text,
|
| 1482 |
+
lex_diversity=60,
|
| 1483 |
+
order_diversity=20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1484 |
)
|
| 1485 |
|
| 1486 |
+
# Second pass with BART for longer texts (balanced probability)
|
| 1487 |
if self.use_bart and len(paraphrased_text.split()) > 8:
|
| 1488 |
+
# 30% chance to use BART for more variation (balanced)
|
| 1489 |
+
if random.random() < 0.3:
|
| 1490 |
+
paraphrased_text = self.paraphrase_with_bart(paraphrased_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1491 |
|
| 1492 |
+
# Apply sentence variation
|
| 1493 |
+
paraphrased_text = self.apply_sentence_variation(paraphrased_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1494 |
|
| 1495 |
+
# Add natural flow variations
|
| 1496 |
+
paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1497 |
|
| 1498 |
# Fix punctuation and formatting
|
| 1499 |
paraphrased_text = self.fix_punctuation(paraphrased_text)
|
|
|
|
| 1658 |
# Initialize the humanizer
|
| 1659 |
humanizer = EnhancedDipperHumanizer()
|
| 1660 |
|
| 1661 |
+
def humanize_html(html_input, progress=gr.Progress()):
|
| 1662 |
+
"""Gradio interface function with progress updates"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1663 |
if not html_input:
|
| 1664 |
return "Please provide HTML content to humanize."
|
| 1665 |
|
|
|
|
| 1671 |
if total > 0:
|
| 1672 |
progress(current / total, desc=f"Processing: {current}/{total} elements")
|
| 1673 |
|
| 1674 |
+
# Pass progress callback to process_html
|
| 1675 |
result = humanizer.process_html(
|
| 1676 |
html_input,
|
| 1677 |
+
progress_callback=progress_callback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1678 |
)
|
| 1679 |
|
| 1680 |
processing_time = time.time() - start_time
|
|
|
|
| 1683 |
|
| 1684 |
return result
|
| 1685 |
|
| 1686 |
+
# Create Gradio interface with queue
|
| 1687 |
+
iface = gr.Interface(
|
| 1688 |
+
fn=humanize_html,
|
| 1689 |
+
inputs=[
|
| 1690 |
+
gr.Textbox(
|
| 1691 |
+
lines=10,
|
| 1692 |
+
placeholder="Paste your HTML content here...",
|
| 1693 |
+
label="HTML Input"
|
| 1694 |
+
)
|
| 1695 |
+
],
|
| 1696 |
+
outputs=gr.Textbox(
|
| 1697 |
+
lines=10,
|
| 1698 |
+
label="Humanized HTML Output"
|
| 1699 |
+
),
|
| 1700 |
+
title="Enhanced Dipper AI Humanizer - Optimized for Originality AI",
|
| 1701 |
+
description="""
|
| 1702 |
+
Ultra-aggressive humanizer optimized to achieve 100% human scores on both Undetectable AI and Originality AI.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1703 |
|
| 1704 |
+
Key Features:
|
| 1705 |
+
- Maximum diversity settings (90% lexical, 40% order) for natural variation
|
| 1706 |
+
- Enhanced human patterns: personal opinions, self-corrections, thinking-out-loud
|
| 1707 |
+
- Natural typos, contractions, and conversational flow
|
| 1708 |
+
- Stream-of-consciousness elements and rhetorical questions
|
| 1709 |
+
- Originality AI-specific optimizations: varied sentence starters, emphatic repetitions
|
| 1710 |
+
- Skips content in <strong>, <b>, and heading tags (including inside tables)
|
| 1711 |
+
- Designed to pass the strictest AI detection systems
|
| 1712 |
|
| 1713 |
+
The tool creates genuinely human-like writing patterns that fool even the most sophisticated detectors!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1714 |
|
| 1715 |
+
⚠️ Note: Processing may take 5-10 minutes for large HTML documents.
|
| 1716 |
+
""",
|
| 1717 |
+
examples=[
|
| 1718 |
+
["""<article>
|
| 1719 |
<h1>The Benefits of Regular Exercise</h1>
|
| 1720 |
<div class="author-intro">By John Doe, Fitness Expert | 10 years experience</div>
|
| 1721 |
<p>Regular exercise is essential for maintaining good health. It helps improve cardiovascular fitness, strengthens muscles, and enhances mental well-being. Studies have shown that people who exercise regularly have lower risks of chronic diseases.</p>
|
| 1722 |
<p>Additionally, exercise can boost mood and energy levels. It releases endorphins, which are natural mood elevators. Even moderate activities like walking can make a significant difference in overall health.</p>
|
| 1723 |
+
</article>"""]
|
| 1724 |
+
],
|
| 1725 |
+
theme="default"
|
| 1726 |
+
)
|
| 1727 |
|
| 1728 |
if __name__ == "__main__":
|
| 1729 |
# Enable queue for better handling of long-running processes
|