Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -144,7 +144,7 @@ class HumanLikeVariations:
|
|
| 144 |
]
|
| 145 |
|
| 146 |
def add_human_touch(self, text):
|
| 147 |
-
"""Add subtle human-like imperfections - MORE
|
| 148 |
sentences = text.split('. ')
|
| 149 |
modified_sentences = []
|
| 150 |
|
|
@@ -152,80 +152,115 @@ class HumanLikeVariations:
|
|
| 152 |
if not sent.strip():
|
| 153 |
continue
|
| 154 |
|
| 155 |
-
#
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
sent = transition + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
|
| 159 |
|
| 160 |
-
# Add filler words occasionally (20% chance -
|
| 161 |
-
if random.random() < 0.2 and len(
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
for
|
| 166 |
-
if len(words)
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
sent = starter + " " + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
|
| 176 |
|
| 177 |
-
# Occasionally use contractions (35% chance
|
| 178 |
if random.random() < 0.35:
|
| 179 |
sent = self.apply_contractions(sent)
|
| 180 |
|
| 181 |
-
# Add occasional comma splices (10% chance) -
|
| 182 |
-
if random.random() < 0.1 and ',' in sent and len(
|
| 183 |
-
#
|
| 184 |
parts = sent.split(', ')
|
| 185 |
-
if len(parts)
|
| 186 |
-
|
| 187 |
-
parts[
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
"
|
| 218 |
-
|
| 219 |
-
"
|
| 220 |
-
|
| 221 |
-
"Sound familiar?"
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
"Following along?",
|
| 225 |
-
"Crazy, right?",
|
| 226 |
-
"Wild, isn't it?"
|
| 227 |
-
]
|
| 228 |
-
sent = sent + " " + random.choice(rhetorical_questions)
|
| 229 |
|
| 230 |
modified_sentences.append(sent)
|
| 231 |
|
|
@@ -261,27 +296,41 @@ class HumanLikeVariations:
|
|
| 261 |
return text
|
| 262 |
|
| 263 |
def add_minor_errors(self, text):
|
| 264 |
-
"""Add very minor, human-like errors - MORE REALISTIC"""
|
| 265 |
# Occasionally miss Oxford comma (15% chance)
|
| 266 |
if random.random() < 0.15:
|
| 267 |
-
|
|
|
|
| 268 |
|
| 269 |
# Sometimes use 'which' instead of 'that' (8% chance)
|
| 270 |
if random.random() < 0.08:
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
-
# NEW: Add very occasional typos (
|
| 274 |
sentences = text.split('. ')
|
| 275 |
for i, sent in enumerate(sentences):
|
| 276 |
-
if random.random() < 0.
|
| 277 |
words = sent.split()
|
| 278 |
# Pick a random word to potentially typo
|
| 279 |
-
word_idx = random.randint(
|
| 280 |
word = words[word_idx].lower()
|
| 281 |
|
| 282 |
-
# Only typo common words
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
# Preserve original capitalization
|
| 286 |
if words[word_idx][0].isupper():
|
| 287 |
typo = typo[0].upper() + typo[1:]
|
|
@@ -290,28 +339,22 @@ class HumanLikeVariations:
|
|
| 290 |
|
| 291 |
text = '. '.join(sentences)
|
| 292 |
|
| 293 |
-
#
|
| 294 |
-
if random.random() < 0.02:
|
| 295 |
-
words = text.split()
|
| 296 |
-
if len(words) > 20:
|
| 297 |
-
# Pick a small common word to double
|
| 298 |
-
small_words = ['the', 'a', 'an', 'is', 'was', 'are', 'were', 'to', 'of', 'in', 'on']
|
| 299 |
-
for idx, word in enumerate(words):
|
| 300 |
-
if word.lower() in small_words and random.random() < 0.1:
|
| 301 |
-
words[idx] = word + ' ' + word
|
| 302 |
-
break
|
| 303 |
-
text = ' '.join(words)
|
| 304 |
|
| 305 |
-
#
|
| 306 |
-
if random.random() < 0.
|
| 307 |
-
|
| 308 |
-
('
|
| 309 |
-
('
|
| 310 |
]
|
| 311 |
-
for pair in
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
return text
|
| 317 |
|
|
@@ -1161,7 +1204,7 @@ class EnhancedDipperHumanizer:
|
|
| 1161 |
return text
|
| 1162 |
|
| 1163 |
def apply_sentence_variation(self, text):
|
| 1164 |
-
"""Apply natural sentence structure variations - MORE
|
| 1165 |
sentences = self.split_into_sentences_advanced(text)
|
| 1166 |
varied_sentences = []
|
| 1167 |
|
|
@@ -1170,89 +1213,143 @@ class EnhancedDipperHumanizer:
|
|
| 1170 |
if not sentence.strip():
|
| 1171 |
continue
|
| 1172 |
|
| 1173 |
-
|
| 1174 |
-
|
|
|
|
| 1175 |
if (i < len(sentences) - 1 and
|
| 1176 |
-
len(
|
| 1177 |
len(sentences[i+1].split()) < 15 and
|
| 1178 |
random.random() < 0.5):
|
| 1179 |
|
| 1180 |
-
connectors = [', and', ', but', '; however,', '. Also,', '. Plus,', ', so', ', which means',
|
| 1181 |
-
' - and', ' - but', '; meanwhile,', '. That said,', ', yet', ' - though']
|
| 1182 |
-
connector = random.choice(connectors)
|
| 1183 |
-
|
| 1184 |
-
# Handle the next sentence properly
|
| 1185 |
next_sent = sentences[i+1].strip()
|
| 1186 |
if next_sent:
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1190 |
|
| 1191 |
elif sentence: # Only process non-empty sentences
|
| 1192 |
-
# Split very long sentences more
|
| 1193 |
-
if len(
|
| 1194 |
-
|
| 1195 |
-
|
| 1196 |
-
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1204 |
else:
|
| 1205 |
varied_sentences.append(sentence)
|
| 1206 |
else:
|
| 1207 |
-
# Add natural variations more often (35% chance)
|
| 1208 |
if i > 0 and random.random() < 0.35:
|
| 1209 |
-
#
|
| 1210 |
-
|
| 1211 |
-
|
| 1212 |
-
|
| 1213 |
-
|
| 1214 |
-
|
| 1215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
|
| 1217 |
-
# Add mid-sentence interruptions (10% chance)
|
| 1218 |
-
if random.random() < 0.1 and len(
|
| 1219 |
-
|
| 1220 |
-
|
| 1221 |
-
|
| 1222 |
-
|
| 1223 |
-
|
| 1224 |
-
|
| 1225 |
-
|
| 1226 |
-
|
| 1227 |
-
|
| 1228 |
-
|
| 1229 |
-
|
| 1230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1231 |
|
| 1232 |
varied_sentences.append(sentence)
|
| 1233 |
|
| 1234 |
# Post-process for additional human patterns
|
| 1235 |
result = ' '.join([s for s in varied_sentences if s])
|
| 1236 |
|
| 1237 |
-
# Add occasional fragments for human touch (5% chance)
|
| 1238 |
-
if random.random() < 0.05:
|
| 1239 |
-
fragments = [
|
| 1240 |
-
"Crazy, I know.",
|
| 1241 |
-
"Wild stuff.",
|
| 1242 |
-
"Makes you think.",
|
| 1243 |
-
"Pretty interesting.",
|
| 1244 |
-
"Go figure.",
|
| 1245 |
-
"Who knew?",
|
| 1246 |
-
"There you have it.",
|
| 1247 |
-
"Food for thought.",
|
| 1248 |
-
"Just saying.",
|
| 1249 |
-
"Worth considering."
|
| 1250 |
-
]
|
| 1251 |
sentences = result.split('. ')
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1256 |
|
| 1257 |
return result
|
| 1258 |
|
|
|
|
| 144 |
]
|
| 145 |
|
| 146 |
def add_human_touch(self, text):
|
| 147 |
+
"""Add subtle human-like imperfections - MORE CONTEXT-AWARE"""
|
| 148 |
sentences = text.split('. ')
|
| 149 |
modified_sentences = []
|
| 150 |
|
|
|
|
| 152 |
if not sent.strip():
|
| 153 |
continue
|
| 154 |
|
| 155 |
+
# Parse sentence structure for better filler placement
|
| 156 |
+
words = sent.split()
|
| 157 |
+
if not words:
|
| 158 |
+
continue
|
| 159 |
+
|
| 160 |
+
# Occasionally start with casual transition (25% chance)
|
| 161 |
+
if i > 0 and random.random() < 0.25 and len(words) > 5:
|
| 162 |
+
# Choose transitions based on sentence type
|
| 163 |
+
if any(q in sent.lower() for q in ['why', 'how', 'what', 'when', 'where']):
|
| 164 |
+
# Question-appropriate transitions
|
| 165 |
+
transition = random.choice(["So, ", "Well, ", "Now, ", "Okay, ", "Right, "])
|
| 166 |
+
elif any(w in sent.lower() for w in ['however', 'but', 'although', 'despite']):
|
| 167 |
+
# Contrast-appropriate transitions
|
| 168 |
+
transition = random.choice(["Still, ", "Yet, ", "Even so, ", "That said, ", "Nonetheless, "])
|
| 169 |
+
else:
|
| 170 |
+
# General transitions
|
| 171 |
+
transition = random.choice(self.casual_transitions[:20]) # Use more common ones
|
| 172 |
+
|
| 173 |
sent = transition + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
|
| 174 |
|
| 175 |
+
# Add filler words occasionally (20% chance) - SMARTER PLACEMENT
|
| 176 |
+
if random.random() < 0.2 and len(words) > 8:
|
| 177 |
+
# Find good positions for fillers (after verbs, before adjectives, etc.)
|
| 178 |
+
good_positions = []
|
| 179 |
+
|
| 180 |
+
for idx, word in enumerate(words):
|
| 181 |
+
if idx > 0 and idx < len(words) - 1:
|
| 182 |
+
# After "is/are/was/were" (good for "really", "actually", etc.)
|
| 183 |
+
if word.lower() in ['is', 'are', 'was', 'were', 'been', 'be']:
|
| 184 |
+
good_positions.append(idx + 1)
|
| 185 |
+
# Before adjectives (good for "quite", "rather", etc.)
|
| 186 |
+
elif words[idx-1].lower() in ['a', 'an', 'the', 'very', 'so']:
|
| 187 |
+
good_positions.append(idx)
|
| 188 |
+
# After "can/could/will/would" (good for "probably", "definitely", etc.)
|
| 189 |
+
elif word.lower() in ['can', 'could', 'will', 'would', 'should', 'might', 'may']:
|
| 190 |
+
good_positions.append(idx + 1)
|
| 191 |
+
|
| 192 |
+
if good_positions:
|
| 193 |
+
insert_pos = random.choice(good_positions)
|
| 194 |
+
# Choose appropriate filler based on context
|
| 195 |
+
if words[insert_pos-1].lower() in ['is', 'are', 'was', 'were']:
|
| 196 |
+
filler = random.choice(['really', 'actually', 'definitely', 'certainly', 'quite'])
|
| 197 |
+
elif words[insert_pos-1].lower() in ['can', 'could', 'will', 'would']:
|
| 198 |
+
filler = random.choice(['probably', 'definitely', 'certainly', 'likely', 'possibly'])
|
| 199 |
+
else:
|
| 200 |
+
filler = random.choice(['quite', 'rather', 'pretty', 'fairly', 'somewhat'])
|
| 201 |
+
|
| 202 |
+
words.insert(insert_pos, filler)
|
| 203 |
+
sent = ' '.join(words)
|
| 204 |
+
|
| 205 |
+
# Add varied sentence starters (15% chance) - MORE LOGICAL
|
| 206 |
+
if i > 0 and random.random() < 0.15 and len(words) > 10:
|
| 207 |
+
# Choose starters based on sentence content
|
| 208 |
+
if any(w in sent.lower() for w in ['research', 'study', 'data', 'evidence']):
|
| 209 |
+
starter = random.choice(["Research shows", "Studies indicate", "Evidence suggests", "Data reveals"])
|
| 210 |
+
elif any(w in sent.lower() for w in ['important', 'crucial', 'vital', 'essential']):
|
| 211 |
+
starter = random.choice(["It's worth noting that", "Keep in mind", "Bear in mind that", "The key here is"])
|
| 212 |
+
else:
|
| 213 |
+
starter = random.choice(["When it comes to", "As for", "Regarding", "In terms of"])
|
| 214 |
+
|
| 215 |
sent = starter + " " + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
|
| 216 |
|
| 217 |
+
# Occasionally use contractions (35% chance)
|
| 218 |
if random.random() < 0.35:
|
| 219 |
sent = self.apply_contractions(sent)
|
| 220 |
|
| 221 |
+
# Add occasional comma splices (10% chance) - ONLY WHERE IT MAKES SENSE
|
| 222 |
+
if random.random() < 0.1 and ',' in sent and len(words) > 10:
|
| 223 |
+
# Only do this with independent clauses
|
| 224 |
parts = sent.split(', ')
|
| 225 |
+
if len(parts) == 2:
|
| 226 |
+
# Check if both parts could be sentences
|
| 227 |
+
if (len(parts[0].split()) > 4 and len(parts[1].split()) > 4 and
|
| 228 |
+
any(v in parts[1].lower().split()[:3] for v in ['it', 'this', 'that', 'they', 'we', 'i', 'you'])):
|
| 229 |
+
sent = parts[0] + ', ' + parts[1] # Keep the comma splice
|
| 230 |
+
|
| 231 |
+
# NEW: Add parenthetical thoughts (8% chance) - CONTEXT-AWARE
|
| 232 |
+
if random.random() < 0.08 and len(words) > 15:
|
| 233 |
+
# Find natural break points (after complete thoughts)
|
| 234 |
+
break_points = []
|
| 235 |
+
for idx, word in enumerate(words):
|
| 236 |
+
if idx > len(words)//3 and idx < 2*len(words)//3:
|
| 237 |
+
if word.endswith(',') or words[idx-1].lower() in ['is', 'are', 'was', 'were']:
|
| 238 |
+
break_points.append(idx)
|
| 239 |
+
|
| 240 |
+
if break_points:
|
| 241 |
+
insert_pos = random.choice(break_points)
|
| 242 |
+
# Choose relevant parenthetical
|
| 243 |
+
if any(w in sent.lower() for w in ['surprising', 'interesting', 'amazing']):
|
| 244 |
+
parenthetical = random.choice(["(and that's saying something)", "(believe it or not)", "(surprisingly enough)"])
|
| 245 |
+
elif any(w in sent.lower() for w in ['obvious', 'clear', 'evident']):
|
| 246 |
+
parenthetical = random.choice(["(obviously)", "(clearly)", "(of course)"])
|
| 247 |
+
else:
|
| 248 |
+
parenthetical = random.choice(["(which makes sense)", "(for good reason)", "(as you'd expect)"])
|
| 249 |
+
|
| 250 |
+
words.insert(insert_pos, parenthetical)
|
| 251 |
+
sent = ' '.join(words)
|
| 252 |
+
|
| 253 |
+
# NEW: Occasionally add rhetorical questions (5% chance) - ONLY AT PARAGRAPH ENDS
|
| 254 |
+
if random.random() < 0.05 and i == len(sentences) - 1:
|
| 255 |
+
# Choose question based on sentence content
|
| 256 |
+
if any(w in sent.lower() for w in ['amazing', 'incredible', 'fantastic']):
|
| 257 |
+
question = random.choice(["Pretty cool, right?", "Amazing, isn't it?", "Impressive, huh?"])
|
| 258 |
+
elif any(w in sent.lower() for w in ['important', 'crucial', 'essential']):
|
| 259 |
+
question = random.choice(["Makes sense, right?", "See what I mean?", "Important to remember, yeah?"])
|
| 260 |
+
else:
|
| 261 |
+
question = random.choice(["Interesting, right?", "Makes you think, doesn't it?", "Sound familiar?"])
|
| 262 |
+
|
| 263 |
+
sent = sent + " " + question
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
|
| 265 |
modified_sentences.append(sent)
|
| 266 |
|
|
|
|
| 296 |
return text
|
| 297 |
|
| 298 |
def add_minor_errors(self, text):
|
| 299 |
+
"""Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
|
| 300 |
# Occasionally miss Oxford comma (15% chance)
|
| 301 |
if random.random() < 0.15:
|
| 302 |
+
# Only in lists, not random commas
|
| 303 |
+
text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
|
| 304 |
|
| 305 |
# Sometimes use 'which' instead of 'that' (8% chance)
|
| 306 |
if random.random() < 0.08:
|
| 307 |
+
# Only for non-restrictive clauses
|
| 308 |
+
matches = re.finditer(r'\b(\w+) that (\w+)', text)
|
| 309 |
+
for match in list(matches)[:1]: # Only first occurrence
|
| 310 |
+
if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
|
| 311 |
+
text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
|
| 312 |
|
| 313 |
+
# NEW: Add very occasional typos (2% chance per sentence) - REDUCED AND CONTROLLED
|
| 314 |
sentences = text.split('. ')
|
| 315 |
for i, sent in enumerate(sentences):
|
| 316 |
+
if random.random() < 0.02 and len(sent.split()) > 15: # Only in longer sentences
|
| 317 |
words = sent.split()
|
| 318 |
# Pick a random word to potentially typo
|
| 319 |
+
word_idx = random.randint(len(words)//2, len(words)-2) # Avoid start/end
|
| 320 |
word = words[word_idx].lower()
|
| 321 |
|
| 322 |
+
# Only typo common words where typo won't break meaning
|
| 323 |
+
safe_typos = {
|
| 324 |
+
'the': 'teh',
|
| 325 |
+
'and': 'adn',
|
| 326 |
+
'that': 'taht',
|
| 327 |
+
'with': 'wtih',
|
| 328 |
+
'from': 'form',
|
| 329 |
+
'because': 'becuase'
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
if word in safe_typos and random.random() < 0.5:
|
| 333 |
+
typo = safe_typos[word]
|
| 334 |
# Preserve original capitalization
|
| 335 |
if words[word_idx][0].isupper():
|
| 336 |
typo = typo[0].upper() + typo[1:]
|
|
|
|
| 339 |
|
| 340 |
text = '. '.join(sentences)
|
| 341 |
|
| 342 |
+
# Skip double words - too distracting
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
+
# Mix up common homophones occasionally (2% chance) - ONLY SAFE ONES
|
| 345 |
+
if random.random() < 0.02:
|
| 346 |
+
safe_homophones = [
|
| 347 |
+
('its', "it's"), # Very common mistake
|
| 348 |
+
('your', "you're"), # Another common one
|
| 349 |
]
|
| 350 |
+
for pair in safe_homophones:
|
| 351 |
+
# Check context to avoid breaking meaning
|
| 352 |
+
if f" {pair[0]} " in text and random.random() < 0.3:
|
| 353 |
+
# Find one instance and check it's safe to replace
|
| 354 |
+
pattern = rf'\b{pair[0]}\s+(\w+ing|\w+ed)\b' # its + verb = likely should be it's
|
| 355 |
+
if re.search(pattern, text):
|
| 356 |
+
text = re.sub(pattern, f"{pair[1]} \\1", text, count=1)
|
| 357 |
+
break
|
| 358 |
|
| 359 |
return text
|
| 360 |
|
|
|
|
| 1204 |
return text
|
| 1205 |
|
| 1206 |
def apply_sentence_variation(self, text):
|
| 1207 |
+
"""Apply natural sentence structure variations - MORE INTELLIGENT"""
|
| 1208 |
sentences = self.split_into_sentences_advanced(text)
|
| 1209 |
varied_sentences = []
|
| 1210 |
|
|
|
|
| 1213 |
if not sentence.strip():
|
| 1214 |
continue
|
| 1215 |
|
| 1216 |
+
words = sentence.split()
|
| 1217 |
+
|
| 1218 |
+
# Combine short sentences more often (50% chance) - BUT SMARTLY
|
| 1219 |
if (i < len(sentences) - 1 and
|
| 1220 |
+
len(words) < 15 and
|
| 1221 |
len(sentences[i+1].split()) < 15 and
|
| 1222 |
random.random() < 0.5):
|
| 1223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1224 |
next_sent = sentences[i+1].strip()
|
| 1225 |
if next_sent:
|
| 1226 |
+
# Check if sentences are related (share common words or themes)
|
| 1227 |
+
current_words = set(w.lower() for w in words if len(w) > 3)
|
| 1228 |
+
next_words = set(w.lower() for w in next_sent.split() if len(w) > 3)
|
| 1229 |
+
|
| 1230 |
+
# Only combine if they share context or one follows from the other
|
| 1231 |
+
if current_words & next_words or any(w in next_sent.lower() for w in ['this', 'that', 'these', 'those', 'it']):
|
| 1232 |
+
# Choose appropriate connector based on relationship
|
| 1233 |
+
if any(w in next_sent.lower().split()[:3] for w in ['however', 'but', 'yet', 'although']):
|
| 1234 |
+
connector = random.choice([', but', '; however,', ', yet', ' - though'])
|
| 1235 |
+
elif any(w in next_sent.lower().split()[:3] for w in ['therefore', 'thus', 'so', 'hence']):
|
| 1236 |
+
connector = random.choice([', so', '. Therefore,', ', which means', ' - thus'])
|
| 1237 |
+
elif any(w in next_sent.lower().split()[:3] for w in ['also', 'additionally', 'furthermore']):
|
| 1238 |
+
connector = random.choice([', and', '. Also,', '. Plus,', ' - additionally,'])
|
| 1239 |
+
else:
|
| 1240 |
+
connector = random.choice([', and', ', which', ' - '])
|
| 1241 |
+
|
| 1242 |
+
combined = f"{sentence.rstrip('.')}{connector} {next_sent[0].lower()}{next_sent[1:]}"
|
| 1243 |
+
varied_sentences.append(combined)
|
| 1244 |
+
sentences[i+1] = "" # Mark as processed
|
| 1245 |
+
else:
|
| 1246 |
+
varied_sentences.append(sentence)
|
| 1247 |
+
else:
|
| 1248 |
+
varied_sentences.append(sentence)
|
| 1249 |
|
| 1250 |
elif sentence: # Only process non-empty sentences
|
| 1251 |
+
# Split very long sentences more intelligently
|
| 1252 |
+
if len(words) > 18:
|
| 1253 |
+
# Look for natural break points
|
| 1254 |
+
break_words = ['however', 'therefore', 'moreover', 'furthermore', 'additionally', 'consequently']
|
| 1255 |
+
conjunctions = [', and', ', but', ', so', ', yet', ', for', ', or', ', nor']
|
| 1256 |
+
|
| 1257 |
+
# Find the best break point
|
| 1258 |
+
best_break = -1
|
| 1259 |
+
for idx, word in enumerate(words):
|
| 1260 |
+
if word.lower().rstrip(',') in break_words and idx > len(words)//3:
|
| 1261 |
+
best_break = idx
|
| 1262 |
+
break
|
| 1263 |
+
|
| 1264 |
+
# If no break word found, look for conjunctions
|
| 1265 |
+
if best_break == -1:
|
| 1266 |
+
text_lower = sentence.lower()
|
| 1267 |
+
for conj in conjunctions:
|
| 1268 |
+
if conj in text_lower:
|
| 1269 |
+
# Find position in word list
|
| 1270 |
+
conj_pos = text_lower.find(conj)
|
| 1271 |
+
word_count = len(text_lower[:conj_pos].split())
|
| 1272 |
+
if len(words)//3 < word_count < 2*len(words)//3:
|
| 1273 |
+
best_break = word_count
|
| 1274 |
+
break
|
| 1275 |
+
|
| 1276 |
+
# Split if good break point found
|
| 1277 |
+
if best_break > 0 and random.random() < 0.7:
|
| 1278 |
+
part1 = ' '.join(words[:best_break])
|
| 1279 |
+
part2 = ' '.join(words[best_break:])
|
| 1280 |
+
|
| 1281 |
+
# Clean up punctuation
|
| 1282 |
+
part1 = part1.rstrip(',') + '.'
|
| 1283 |
+
# Capitalize second part appropriately
|
| 1284 |
+
if part2 and part2[0].islower() and not part2.startswith(('however', 'therefore', 'moreover')):
|
| 1285 |
+
part2 = part2[0].upper() + part2[1:]
|
| 1286 |
+
|
| 1287 |
+
varied_sentences.append(part1)
|
| 1288 |
+
varied_sentences.append(part2)
|
| 1289 |
else:
|
| 1290 |
varied_sentences.append(sentence)
|
| 1291 |
else:
|
| 1292 |
+
# Add natural variations more often (35% chance) - BUT CONTEXTUALLY
|
| 1293 |
if i > 0 and random.random() < 0.35:
|
| 1294 |
+
# Check previous sentence ending to choose appropriate transition
|
| 1295 |
+
if varied_sentences and len(varied_sentences) > 0:
|
| 1296 |
+
prev_sent = varied_sentences[-1]
|
| 1297 |
+
|
| 1298 |
+
# Choose transition based on relationship
|
| 1299 |
+
if any(w in sentence.lower() for w in ['however', 'but', 'although', 'despite']):
|
| 1300 |
+
transition = random.choice(['However, ', 'On the other hand, ', 'That said, ', 'Nevertheless, '])
|
| 1301 |
+
elif any(w in sentence.lower() for w in ['example', 'instance', 'such as', 'like']):
|
| 1302 |
+
transition = random.choice(['For instance, ', 'For example, ', 'To illustrate, ', 'Consider this: '])
|
| 1303 |
+
elif any(w in prev_sent.lower() for w in ['first', 'second', 'finally', 'lastly']):
|
| 1304 |
+
transition = random.choice(['Next, ', 'Additionally, ', 'Furthermore, ', 'Also, '])
|
| 1305 |
+
else:
|
| 1306 |
+
transition = random.choice(['Furthermore, ', 'Additionally, ', 'Moreover, ', 'Also, '])
|
| 1307 |
+
|
| 1308 |
+
if sentence[0].isupper():
|
| 1309 |
+
sentence = transition + sentence[0].lower() + sentence[1:]
|
| 1310 |
|
| 1311 |
+
# Add mid-sentence interruptions (10% chance) - ONLY WHERE NATURAL
|
| 1312 |
+
if random.random() < 0.1 and len(words) > 12:
|
| 1313 |
+
# Find natural pause points (after commas, before "which", etc.)
|
| 1314 |
+
pause_points = []
|
| 1315 |
+
for idx, word in enumerate(words):
|
| 1316 |
+
if word.endswith(',') and idx > len(words)//4 and idx < 3*len(words)//4:
|
| 1317 |
+
pause_points.append(idx + 1)
|
| 1318 |
+
elif word.lower() in ['which', 'that', 'who', 'where'] and idx > len(words)//3:
|
| 1319 |
+
pause_points.append(idx)
|
| 1320 |
+
|
| 1321 |
+
if pause_points:
|
| 1322 |
+
pos = random.choice(pause_points)
|
| 1323 |
+
interruption = random.choice([
|
| 1324 |
+
" - and this is important - ",
|
| 1325 |
+
" - mind you - ",
|
| 1326 |
+
" - interestingly - ",
|
| 1327 |
+
" (worth noting) ",
|
| 1328 |
+
" - by the way - "
|
| 1329 |
+
])
|
| 1330 |
+
words.insert(pos, interruption)
|
| 1331 |
+
sentence = ' '.join(words)
|
| 1332 |
|
| 1333 |
varied_sentences.append(sentence)
|
| 1334 |
|
| 1335 |
# Post-process for additional human patterns
|
| 1336 |
result = ' '.join([s for s in varied_sentences if s])
|
| 1337 |
|
| 1338 |
+
# Add occasional fragments for human touch (5% chance) - ONLY AT APPROPRIATE PLACES
|
| 1339 |
+
if random.random() < 0.05 and len(varied_sentences) > 3:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1340 |
sentences = result.split('. ')
|
| 1341 |
+
# Add fragment after sentences that set up for it
|
| 1342 |
+
for idx, sent in enumerate(sentences[:-1]):
|
| 1343 |
+
if any(w in sent.lower() for w in ['amazing', 'incredible', 'surprising', 'interesting']):
|
| 1344 |
+
fragments = ["Truly remarkable.", "Quite something.", "Really makes you think."]
|
| 1345 |
+
sentences.insert(idx + 1, random.choice(fragments))
|
| 1346 |
+
break
|
| 1347 |
+
elif any(w in sent.lower() for w in ['difficult', 'challenging', 'complex', 'complicated']):
|
| 1348 |
+
fragments = ["Not easy, for sure.", "Tough stuff.", "Challenging indeed."]
|
| 1349 |
+
sentences.insert(idx + 1, random.choice(fragments))
|
| 1350 |
+
break
|
| 1351 |
+
|
| 1352 |
+
result = '. '.join(sentences)
|
| 1353 |
|
| 1354 |
return result
|
| 1355 |
|