Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -279,48 +279,60 @@ class HumanLikeVariations:
|
|
| 279 |
# Natural contractions throughout
|
| 280 |
sentence = self.apply_contractions(sentence)
|
| 281 |
|
| 282 |
-
# Add natural speech patterns (
|
| 283 |
-
if random.random() < 0.
|
| 284 |
-
#
|
| 285 |
-
if random.random() < 0.5:
|
| 286 |
-
# Add "you know" or "I mean" naturally
|
| 287 |
-
words = sentence.split()
|
| 288 |
-
if len(words) > 6:
|
| 289 |
-
pos = random.randint(3, len(words)-3)
|
| 290 |
-
if random.random() < 0.5:
|
| 291 |
-
words.insert(pos, "you know,")
|
| 292 |
-
else:
|
| 293 |
-
words.insert(pos, "I mean,")
|
| 294 |
-
sentence = ' '.join(words)
|
| 295 |
-
else:
|
| 296 |
-
# Start with natural opener
|
| 297 |
-
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 298 |
-
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 299 |
-
|
| 300 |
-
# Add subtle errors that humans make (8% chance)
|
| 301 |
-
if random.random() < 0.08:
|
| 302 |
words = sentence.split()
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
-
#
|
| 315 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
next_sent = sentences[i+1].strip()
|
| 317 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 318 |
-
#
|
| 319 |
-
if any(
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
elif any(w in next_sent.lower() for w in ['also', 'too', 'as well']):
|
| 323 |
-
sentence = sentence.rstrip('.') + " and " + next_sent[0].lower() + next_sent[1:]
|
| 324 |
sentences[i+1] = "" # Mark as processed
|
| 325 |
|
| 326 |
result_sentences.append(sentence)
|
|
@@ -1121,39 +1133,43 @@ class EnhancedDipperHumanizer:
|
|
| 1121 |
|
| 1122 |
generated = generated.strip()
|
| 1123 |
|
| 1124 |
-
#
|
| 1125 |
words = generated.split()
|
| 1126 |
-
|
| 1127 |
-
|
| 1128 |
-
|
| 1129 |
-
|
| 1130 |
-
|
| 1131 |
-
|
| 1132 |
-
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
|
| 1138 |
-
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1157 |
if generated and generated[-1] not in '.!?:,;':
|
| 1158 |
# Check original ending
|
| 1159 |
orig_stripped = original.strip()
|
|
@@ -1166,20 +1182,25 @@ class EnhancedDipperHumanizer:
|
|
| 1166 |
else:
|
| 1167 |
generated += '.'
|
| 1168 |
elif orig_stripped.endswith('!'):
|
| 1169 |
-
|
| 1170 |
-
exclaim_words = ['amazing', 'incredible', 'fantastic', 'terrible', 'awful', 'wonderful', 'excellent']
|
| 1171 |
-
if any(word in generated.lower() for word in exclaim_words):
|
| 1172 |
-
generated += '!'
|
| 1173 |
-
else:
|
| 1174 |
-
generated += '.'
|
| 1175 |
elif orig_stripped.endswith(':'):
|
| 1176 |
generated += ':'
|
| 1177 |
else:
|
| 1178 |
generated += '.'
|
| 1179 |
|
| 1180 |
-
#
|
| 1181 |
-
|
| 1182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1183 |
generated = generated[0].upper() + generated[1:]
|
| 1184 |
|
| 1185 |
return generated
|
|
|
|
| 279 |
# Natural contractions throughout
|
| 280 |
sentence = self.apply_contractions(sentence)
|
| 281 |
|
| 282 |
+
# Add natural speech patterns (12% chance) - reduced and more selective
|
| 283 |
+
if random.random() < 0.12 and len(sentence.split()) > 12:
|
| 284 |
+
# Only add where it sounds natural
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
words = sentence.split()
|
| 286 |
+
|
| 287 |
+
# Find natural positions (after commas, before explanations)
|
| 288 |
+
natural_positions = []
|
| 289 |
+
for idx, word in enumerate(words):
|
| 290 |
+
if idx > 3 and idx < len(words) - 3:
|
| 291 |
+
if word.endswith(',') or words[idx-1].endswith(','):
|
| 292 |
+
natural_positions.append(idx)
|
| 293 |
+
|
| 294 |
+
if natural_positions and random.random() < 0.5:
|
| 295 |
+
pos = random.choice(natural_positions)
|
| 296 |
+
if random.random() < 0.7:
|
| 297 |
+
words.insert(pos, "you know,")
|
| 298 |
+
else:
|
| 299 |
+
words.insert(pos, "I mean,")
|
| 300 |
+
sentence = ' '.join(words)
|
| 301 |
|
| 302 |
+
# Add READABLE human errors (5% chance) - reduced
|
| 303 |
+
if random.random() < 0.05 and len(sentence.split()) > 10:
|
| 304 |
+
error_applied = False
|
| 305 |
+
|
| 306 |
+
# Missing Oxford comma (most common and acceptable)
|
| 307 |
+
if not error_applied and ', and ' in sentence:
|
| 308 |
+
if random.random() < 0.6:
|
| 309 |
+
sentence = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', sentence, count=1)
|
| 310 |
+
error_applied = True
|
| 311 |
+
|
| 312 |
+
# Very occasional its/it's confusion (but only where it makes sense)
|
| 313 |
+
if not error_applied and random.random() < 0.3:
|
| 314 |
+
if " its " in sentence:
|
| 315 |
+
# Check if followed by a verb (where it's would be correct)
|
| 316 |
+
match = re.search(r'\bits\s+(\w+ing|\w+ed)\b', sentence)
|
| 317 |
+
if match:
|
| 318 |
+
sentence = sentence.replace(" its ", " it's ", 1)
|
| 319 |
+
error_applied = True
|
| 320 |
+
|
| 321 |
+
# Occasional missing article (but subtle)
|
| 322 |
+
if not error_applied and random.random() < 0.2:
|
| 323 |
+
# Only with "the" before certain nouns
|
| 324 |
+
if " the same " in sentence:
|
| 325 |
+
sentence = sentence.replace(" the same ", " same ", 1)
|
| 326 |
+
error_applied = True
|
| 327 |
+
|
| 328 |
+
# Natural sentence combinations (15% chance) - reduced
|
| 329 |
+
if i < len(sentences) - 1 and random.random() < 0.15:
|
| 330 |
next_sent = sentences[i+1].strip()
|
| 331 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 332 |
+
# Only combine if semantically related
|
| 333 |
+
if any(next_sent.lower().startswith(w) for w in ['this', 'that', 'it', 'which']):
|
| 334 |
+
# Natural combination
|
| 335 |
+
sentence = sentence.rstrip('.') + ", " + next_sent[0].lower() + next_sent[1:]
|
|
|
|
|
|
|
| 336 |
sentences[i+1] = "" # Mark as processed
|
| 337 |
|
| 338 |
result_sentences.append(sentence)
|
|
|
|
| 1133 |
|
| 1134 |
generated = generated.strip()
|
| 1135 |
|
| 1136 |
+
# Fix fragments and incomplete thoughts
|
| 1137 |
words = generated.split()
|
| 1138 |
+
|
| 1139 |
+
# Check for sentence fragments (less than 3 words or no verb)
|
| 1140 |
+
if len(words) < 3:
|
| 1141 |
+
# Try to merge with previous context or expand
|
| 1142 |
+
if len(words) == 2 and words[1].endswith('?'):
|
| 1143 |
+
# Like "Some news?" - expand it
|
| 1144 |
+
generated = "Here's some " + words[0].lower() + " " + words[1]
|
| 1145 |
+
else:
|
| 1146 |
+
# Too short, return original
|
| 1147 |
+
return original
|
| 1148 |
+
|
| 1149 |
+
# Fix missing verbs or awkward constructions
|
| 1150 |
+
# Check for patterns like "that incomparably less" (missing "are")
|
| 1151 |
+
if ' that ' in generated:
|
| 1152 |
+
that_index = generated.find(' that ')
|
| 1153 |
+
after_that = generated[that_index+5:].split()
|
| 1154 |
+
if len(after_that) > 0 and after_that[0] in ['incomparably', 'incredibly', 'remarkably', 'significantly']:
|
| 1155 |
+
# Likely missing a verb
|
| 1156 |
+
if len(after_that) > 1 and after_that[1] in ['less', 'more', 'better', 'worse']:
|
| 1157 |
+
# Insert "are"
|
| 1158 |
+
generated = generated[:that_index+5] + "are " + generated[that_index+5:]
|
| 1159 |
+
|
| 1160 |
+
# Fix awkward prepositional phrases
|
| 1161 |
+
# "tuition fees USA for Indians" -> "tuition fees in the USA for Indians"
|
| 1162 |
+
awkward_patterns = [
|
| 1163 |
+
(r'\bfees\s+USA\b', 'fees in the USA'),
|
| 1164 |
+
(r'\bfees\s+US\b', 'fees in the US'),
|
| 1165 |
+
(r'\bstudies\s+USA\b', 'studies in the USA'),
|
| 1166 |
+
(r'\bcost\s+USA\b', 'cost in the USA'),
|
| 1167 |
+
]
|
| 1168 |
+
|
| 1169 |
+
for pattern, replacement in awkward_patterns:
|
| 1170 |
+
generated = re.sub(pattern, replacement, generated, flags=re.IGNORECASE)
|
| 1171 |
+
|
| 1172 |
+
# Ensure proper ending punctuation
|
| 1173 |
if generated and generated[-1] not in '.!?:,;':
|
| 1174 |
# Check original ending
|
| 1175 |
orig_stripped = original.strip()
|
|
|
|
| 1182 |
else:
|
| 1183 |
generated += '.'
|
| 1184 |
elif orig_stripped.endswith('!'):
|
| 1185 |
+
generated += '!'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1186 |
elif orig_stripped.endswith(':'):
|
| 1187 |
generated += ':'
|
| 1188 |
else:
|
| 1189 |
generated += '.'
|
| 1190 |
|
| 1191 |
+
# Fix awkward colons in the middle of sentences
|
| 1192 |
+
if ':' in generated and not generated.endswith(':'):
|
| 1193 |
+
# Check if it's a list introduction (which is fine)
|
| 1194 |
+
colon_index = generated.find(':')
|
| 1195 |
+
after_colon = generated[colon_index+1:].strip()
|
| 1196 |
+
# If what follows isn't a list or explanation, replace with semicolon or comma
|
| 1197 |
+
if after_colon and not any(after_colon.startswith(w) for w in ['the', 'a', 'an', '1.', '•', '-']):
|
| 1198 |
+
if 'they' in after_colon.lower()[:10]:
|
| 1199 |
+
# Like "have equal contact: they" -> "have equal contact; they"
|
| 1200 |
+
generated = generated.replace(':', ';', 1)
|
| 1201 |
+
|
| 1202 |
+
# Ensure first letter is capitalized
|
| 1203 |
+
if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]):
|
| 1204 |
generated = generated[0].upper() + generated[1:]
|
| 1205 |
|
| 1206 |
return generated
|