Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -279,60 +279,48 @@ class HumanLikeVariations:
|
|
| 279 |
# Natural contractions throughout
|
| 280 |
sentence = self.apply_contractions(sentence)
|
| 281 |
|
| 282 |
-
# Add natural speech patterns (
|
| 283 |
-
if random.random() < 0.
|
| 284 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
words = sentence.split()
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
if random.random() < 0.7:
|
| 297 |
-
words.insert(pos, "you know,")
|
| 298 |
-
else:
|
| 299 |
-
words.insert(pos, "I mean,")
|
| 300 |
-
sentence = ' '.join(words)
|
| 301 |
|
| 302 |
-
#
|
| 303 |
-
if
|
| 304 |
-
error_applied = False
|
| 305 |
-
|
| 306 |
-
# Missing Oxford comma (most common and acceptable)
|
| 307 |
-
if not error_applied and ', and ' in sentence:
|
| 308 |
-
if random.random() < 0.6:
|
| 309 |
-
sentence = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', sentence, count=1)
|
| 310 |
-
error_applied = True
|
| 311 |
-
|
| 312 |
-
# Very occasional its/it's confusion (but only where it makes sense)
|
| 313 |
-
if not error_applied and random.random() < 0.3:
|
| 314 |
-
if " its " in sentence:
|
| 315 |
-
# Check if followed by a verb (where it's would be correct)
|
| 316 |
-
match = re.search(r'\bits\s+(\w+ing|\w+ed)\b', sentence)
|
| 317 |
-
if match:
|
| 318 |
-
sentence = sentence.replace(" its ", " it's ", 1)
|
| 319 |
-
error_applied = True
|
| 320 |
-
|
| 321 |
-
# Occasional missing article (but subtle)
|
| 322 |
-
if not error_applied and random.random() < 0.2:
|
| 323 |
-
# Only with "the" before certain nouns
|
| 324 |
-
if " the same " in sentence:
|
| 325 |
-
sentence = sentence.replace(" the same ", " same ", 1)
|
| 326 |
-
error_applied = True
|
| 327 |
-
|
| 328 |
-
# Natural sentence combinations (15% chance) - reduced
|
| 329 |
-
if i < len(sentences) - 1 and random.random() < 0.15:
|
| 330 |
next_sent = sentences[i+1].strip()
|
| 331 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 332 |
-
#
|
| 333 |
-
if any(next_sent.lower()
|
| 334 |
-
|
| 335 |
-
|
|
|
|
|
|
|
| 336 |
sentences[i+1] = "" # Mark as processed
|
| 337 |
|
| 338 |
result_sentences.append(sentence)
|
|
@@ -1133,43 +1121,39 @@ class EnhancedDipperHumanizer:
|
|
| 1133 |
|
| 1134 |
generated = generated.strip()
|
| 1135 |
|
| 1136 |
-
#
|
| 1137 |
words = generated.split()
|
| 1138 |
-
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
-
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
for pattern, replacement in awkward_patterns:
|
| 1170 |
-
generated = re.sub(pattern, replacement, generated, flags=re.IGNORECASE)
|
| 1171 |
-
|
| 1172 |
-
# Ensure proper ending punctuation
|
| 1173 |
if generated and generated[-1] not in '.!?:,;':
|
| 1174 |
# Check original ending
|
| 1175 |
orig_stripped = original.strip()
|
|
@@ -1182,25 +1166,20 @@ class EnhancedDipperHumanizer:
|
|
| 1182 |
else:
|
| 1183 |
generated += '.'
|
| 1184 |
elif orig_stripped.endswith('!'):
|
| 1185 |
-
generated
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1186 |
elif orig_stripped.endswith(':'):
|
| 1187 |
generated += ':'
|
| 1188 |
else:
|
| 1189 |
generated += '.'
|
| 1190 |
|
| 1191 |
-
#
|
| 1192 |
-
|
| 1193 |
-
|
| 1194 |
-
colon_index = generated.find(':')
|
| 1195 |
-
after_colon = generated[colon_index+1:].strip()
|
| 1196 |
-
# If what follows isn't a list or explanation, replace with semicolon or comma
|
| 1197 |
-
if after_colon and not any(after_colon.startswith(w) for w in ['the', 'a', 'an', '1.', '•', '-']):
|
| 1198 |
-
if 'they' in after_colon.lower()[:10]:
|
| 1199 |
-
# Like "have equal contact: they" -> "have equal contact; they"
|
| 1200 |
-
generated = generated.replace(':', ';', 1)
|
| 1201 |
-
|
| 1202 |
-
# Ensure first letter is capitalized
|
| 1203 |
-
if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]):
|
| 1204 |
generated = generated[0].upper() + generated[1:]
|
| 1205 |
|
| 1206 |
return generated
|
|
|
|
| 279 |
# Natural contractions throughout
|
| 280 |
sentence = self.apply_contractions(sentence)
|
| 281 |
|
| 282 |
+
# Add natural speech patterns (15% chance)
|
| 283 |
+
if random.random() < 0.15 and len(sentence.split()) > 10:
|
| 284 |
+
# Natural interruptions that humans actually use
|
| 285 |
+
if random.random() < 0.5:
|
| 286 |
+
# Add "you know" or "I mean" naturally
|
| 287 |
+
words = sentence.split()
|
| 288 |
+
if len(words) > 6:
|
| 289 |
+
pos = random.randint(3, len(words)-3)
|
| 290 |
+
if random.random() < 0.5:
|
| 291 |
+
words.insert(pos, "you know,")
|
| 292 |
+
else:
|
| 293 |
+
words.insert(pos, "I mean,")
|
| 294 |
+
sentence = ' '.join(words)
|
| 295 |
+
else:
|
| 296 |
+
# Start with natural opener
|
| 297 |
+
openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
|
| 298 |
+
sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
|
| 299 |
+
|
| 300 |
+
# Add subtle errors that humans make (8% chance)
|
| 301 |
+
if random.random() < 0.08:
|
| 302 |
words = sentence.split()
|
| 303 |
+
if len(words) > 5:
|
| 304 |
+
# Common comma omissions
|
| 305 |
+
if ", and" in sentence and random.random() < 0.3:
|
| 306 |
+
sentence = sentence.replace(", and", " and", 1)
|
| 307 |
+
# Double words occasionally
|
| 308 |
+
elif random.random() < 0.2:
|
| 309 |
+
idx = random.randint(1, len(words)-2)
|
| 310 |
+
if words[idx].lower() in ['the', 'a', 'to', 'in', 'on', 'at']:
|
| 311 |
+
words.insert(idx+1, words[idx])
|
| 312 |
+
sentence = ' '.join(words)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
+
# Natural sentence combinations (20% chance)
|
| 315 |
+
if i < len(sentences) - 1 and random.random() < 0.2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
next_sent = sentences[i+1].strip()
|
| 317 |
if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
|
| 318 |
+
# Natural connectors based on content
|
| 319 |
+
if any(w in next_sent.lower() for w in ['but', 'however', 'although']):
|
| 320 |
+
sentence = sentence.rstrip('.') + ", but " + next_sent[0].lower() + next_sent[1:]
|
| 321 |
+
sentences[i+1] = "" # Mark as processed
|
| 322 |
+
elif any(w in next_sent.lower() for w in ['also', 'too', 'as well']):
|
| 323 |
+
sentence = sentence.rstrip('.') + " and " + next_sent[0].lower() + next_sent[1:]
|
| 324 |
sentences[i+1] = "" # Mark as processed
|
| 325 |
|
| 326 |
result_sentences.append(sentence)
|
|
|
|
| 1121 |
|
| 1122 |
generated = generated.strip()
|
| 1123 |
|
| 1124 |
+
# Check if the sentence seems complete semantically
|
| 1125 |
words = generated.split()
|
| 1126 |
+
if len(words) >= 3:
|
| 1127 |
+
# Check if last word is a good ending word
|
| 1128 |
+
last_word = words[-1].lower().rstrip('.,!?;:')
|
| 1129 |
+
|
| 1130 |
+
# Common ending words that might not need punctuation fix
|
| 1131 |
+
ending_words = {
|
| 1132 |
+
'too', 'also', 'well', 'though', 'however',
|
| 1133 |
+
'furthermore', 'moreover', 'indeed', 'anyway',
|
| 1134 |
+
'regardless', 'nonetheless', 'therefore', 'thus'
|
| 1135 |
+
}
|
| 1136 |
+
|
| 1137 |
+
# If it ends with a good word, just add appropriate punctuation
|
| 1138 |
+
if last_word in ending_words:
|
| 1139 |
+
if generated[-1] not in '.!?':
|
| 1140 |
+
generated += '.'
|
| 1141 |
+
return generated
|
| 1142 |
+
|
| 1143 |
+
# Check for cut-off patterns
|
| 1144 |
+
if len(words) > 0:
|
| 1145 |
+
last_word = words[-1]
|
| 1146 |
+
|
| 1147 |
+
# Remove if it's clearly cut off (1-2 chars, no vowels)
|
| 1148 |
+
# But don't remove valid short words like "is", "of", "to", etc.
|
| 1149 |
+
short_valid_words = {'is', 'of', 'to', 'in', 'on', 'at', 'by', 'or', 'if', 'so', 'up', 'no', 'we', 'he', 'me', 'be', 'do', 'go'}
|
| 1150 |
+
if (len(last_word) <= 2 and
|
| 1151 |
+
last_word.lower() not in short_valid_words and
|
| 1152 |
+
not any(c in 'aeiouAEIOU' for c in last_word)):
|
| 1153 |
+
words = words[:-1]
|
| 1154 |
+
generated = ' '.join(words)
|
| 1155 |
+
|
| 1156 |
+
# Add ending punctuation based on context
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1157 |
if generated and generated[-1] not in '.!?:,;':
|
| 1158 |
# Check original ending
|
| 1159 |
orig_stripped = original.strip()
|
|
|
|
| 1166 |
else:
|
| 1167 |
generated += '.'
|
| 1168 |
elif orig_stripped.endswith('!'):
|
| 1169 |
+
# Check if generated seems exclamatory
|
| 1170 |
+
exclaim_words = ['amazing', 'incredible', 'fantastic', 'terrible', 'awful', 'wonderful', 'excellent']
|
| 1171 |
+
if any(word in generated.lower() for word in exclaim_words):
|
| 1172 |
+
generated += '!'
|
| 1173 |
+
else:
|
| 1174 |
+
generated += '.'
|
| 1175 |
elif orig_stripped.endswith(':'):
|
| 1176 |
generated += ':'
|
| 1177 |
else:
|
| 1178 |
generated += '.'
|
| 1179 |
|
| 1180 |
+
# Ensure first letter is capitalized ONLY if it's sentence start
|
| 1181 |
+
# Don't capitalize words like "iPhone" or "eBay" or placeholders
|
| 1182 |
+
if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]) and not generated.startswith('__KW'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1183 |
generated = generated[0].upper() + generated[1:]
|
| 1184 |
|
| 1185 |
return generated
|