EdysorEdutech commited on
Commit
4633815
·
verified ·
1 Parent(s): 94251cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -79
app.py CHANGED
@@ -279,48 +279,60 @@ class HumanLikeVariations:
279
  # Natural contractions throughout
280
  sentence = self.apply_contractions(sentence)
281
 
282
- # Add natural speech patterns (15% chance)
283
- if random.random() < 0.15 and len(sentence.split()) > 10:
284
- # Natural interruptions that humans actually use
285
- if random.random() < 0.5:
286
- # Add "you know" or "I mean" naturally
287
- words = sentence.split()
288
- if len(words) > 6:
289
- pos = random.randint(3, len(words)-3)
290
- if random.random() < 0.5:
291
- words.insert(pos, "you know,")
292
- else:
293
- words.insert(pos, "I mean,")
294
- sentence = ' '.join(words)
295
- else:
296
- # Start with natural opener
297
- openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
298
- sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
299
-
300
- # Add subtle errors that humans make (8% chance)
301
- if random.random() < 0.08:
302
  words = sentence.split()
303
- if len(words) > 5:
304
- # Common comma omissions
305
- if ", and" in sentence and random.random() < 0.3:
306
- sentence = sentence.replace(", and", " and", 1)
307
- # Double words occasionally
308
- elif random.random() < 0.2:
309
- idx = random.randint(1, len(words)-2)
310
- if words[idx].lower() in ['the', 'a', 'to', 'in', 'on', 'at']:
311
- words.insert(idx+1, words[idx])
312
- sentence = ' '.join(words)
 
 
 
 
 
313
 
314
- # Natural sentence combinations (20% chance)
315
- if i < len(sentences) - 1 and random.random() < 0.2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  next_sent = sentences[i+1].strip()
317
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
318
- # Natural connectors based on content
319
- if any(w in next_sent.lower() for w in ['but', 'however', 'although']):
320
- sentence = sentence.rstrip('.') + ", but " + next_sent[0].lower() + next_sent[1:]
321
- sentences[i+1] = "" # Mark as processed
322
- elif any(w in next_sent.lower() for w in ['also', 'too', 'as well']):
323
- sentence = sentence.rstrip('.') + " and " + next_sent[0].lower() + next_sent[1:]
324
  sentences[i+1] = "" # Mark as processed
325
 
326
  result_sentences.append(sentence)
@@ -1121,39 +1133,43 @@ class EnhancedDipperHumanizer:
1121
 
1122
  generated = generated.strip()
1123
 
1124
- # Check if the sentence seems complete semantically
1125
  words = generated.split()
1126
- if len(words) >= 3:
1127
- # Check if last word is a good ending word
1128
- last_word = words[-1].lower().rstrip('.,!?;:')
1129
-
1130
- # Common ending words that might not need punctuation fix
1131
- ending_words = {
1132
- 'too', 'also', 'well', 'though', 'however',
1133
- 'furthermore', 'moreover', 'indeed', 'anyway',
1134
- 'regardless', 'nonetheless', 'therefore', 'thus'
1135
- }
1136
-
1137
- # If it ends with a good word, just add appropriate punctuation
1138
- if last_word in ending_words:
1139
- if generated[-1] not in '.!?':
1140
- generated += '.'
1141
- return generated
1142
-
1143
- # Check for cut-off patterns
1144
- if len(words) > 0:
1145
- last_word = words[-1]
1146
-
1147
- # Remove if it's clearly cut off (1-2 chars, no vowels)
1148
- # But don't remove valid short words like "is", "of", "to", etc.
1149
- short_valid_words = {'is', 'of', 'to', 'in', 'on', 'at', 'by', 'or', 'if', 'so', 'up', 'no', 'we', 'he', 'me', 'be', 'do', 'go'}
1150
- if (len(last_word) <= 2 and
1151
- last_word.lower() not in short_valid_words and
1152
- not any(c in 'aeiouAEIOU' for c in last_word)):
1153
- words = words[:-1]
1154
- generated = ' '.join(words)
1155
-
1156
- # Add ending punctuation based on context
 
 
 
 
1157
  if generated and generated[-1] not in '.!?:,;':
1158
  # Check original ending
1159
  orig_stripped = original.strip()
@@ -1166,20 +1182,25 @@ class EnhancedDipperHumanizer:
1166
  else:
1167
  generated += '.'
1168
  elif orig_stripped.endswith('!'):
1169
- # Check if generated seems exclamatory
1170
- exclaim_words = ['amazing', 'incredible', 'fantastic', 'terrible', 'awful', 'wonderful', 'excellent']
1171
- if any(word in generated.lower() for word in exclaim_words):
1172
- generated += '!'
1173
- else:
1174
- generated += '.'
1175
  elif orig_stripped.endswith(':'):
1176
  generated += ':'
1177
  else:
1178
  generated += '.'
1179
 
1180
- # Ensure first letter is capitalized ONLY if it's sentence start
1181
- # Don't capitalize words like "iPhone" or "eBay" or placeholders
1182
- if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]) and not generated.startswith('__KW'):
 
 
 
 
 
 
 
 
 
 
1183
  generated = generated[0].upper() + generated[1:]
1184
 
1185
  return generated
 
279
  # Natural contractions throughout
280
  sentence = self.apply_contractions(sentence)
281
 
282
+ # Add natural speech patterns (12% chance) - reduced and more selective
283
+ if random.random() < 0.12 and len(sentence.split()) > 12:
284
+ # Only add where it sounds natural
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  words = sentence.split()
286
+
287
+ # Find natural positions (after commas, before explanations)
288
+ natural_positions = []
289
+ for idx, word in enumerate(words):
290
+ if idx > 3 and idx < len(words) - 3:
291
+ if word.endswith(',') or words[idx-1].endswith(','):
292
+ natural_positions.append(idx)
293
+
294
+ if natural_positions and random.random() < 0.5:
295
+ pos = random.choice(natural_positions)
296
+ if random.random() < 0.7:
297
+ words.insert(pos, "you know,")
298
+ else:
299
+ words.insert(pos, "I mean,")
300
+ sentence = ' '.join(words)
301
 
302
+ # Add READABLE human errors (5% chance) - reduced
303
+ if random.random() < 0.05 and len(sentence.split()) > 10:
304
+ error_applied = False
305
+
306
+ # Missing Oxford comma (most common and acceptable)
307
+ if not error_applied and ', and ' in sentence:
308
+ if random.random() < 0.6:
309
+ sentence = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', sentence, count=1)
310
+ error_applied = True
311
+
312
+ # Very occasional its/it's confusion (but only where it makes sense)
313
+ if not error_applied and random.random() < 0.3:
314
+ if " its " in sentence:
315
+ # Check if followed by a verb (where it's would be correct)
316
+ match = re.search(r'\bits\s+(\w+ing|\w+ed)\b', sentence)
317
+ if match:
318
+ sentence = sentence.replace(" its ", " it's ", 1)
319
+ error_applied = True
320
+
321
+ # Occasional missing article (but subtle)
322
+ if not error_applied and random.random() < 0.2:
323
+ # Only with "the" before certain nouns
324
+ if " the same " in sentence:
325
+ sentence = sentence.replace(" the same ", " same ", 1)
326
+ error_applied = True
327
+
328
+ # Natural sentence combinations (15% chance) - reduced
329
+ if i < len(sentences) - 1 and random.random() < 0.15:
330
  next_sent = sentences[i+1].strip()
331
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
332
+ # Only combine if semantically related
333
+ if any(next_sent.lower().startswith(w) for w in ['this', 'that', 'it', 'which']):
334
+ # Natural combination
335
+ sentence = sentence.rstrip('.') + ", " + next_sent[0].lower() + next_sent[1:]
 
 
336
  sentences[i+1] = "" # Mark as processed
337
 
338
  result_sentences.append(sentence)
 
1133
 
1134
  generated = generated.strip()
1135
 
1136
+ # Fix fragments and incomplete thoughts
1137
  words = generated.split()
1138
+
1139
+ # Check for sentence fragments (less than 3 words or no verb)
1140
+ if len(words) < 3:
1141
+ # Try to merge with previous context or expand
1142
+ if len(words) == 2 and words[1].endswith('?'):
1143
+ # Like "Some news?" - expand it
1144
+ generated = "Here's some " + words[0].lower() + " " + words[1]
1145
+ else:
1146
+ # Too short, return original
1147
+ return original
1148
+
1149
+ # Fix missing verbs or awkward constructions
1150
+ # Check for patterns like "that incomparably less" (missing "are")
1151
+ if ' that ' in generated:
1152
+ that_index = generated.find(' that ')
1153
+ after_that = generated[that_index+5:].split()
1154
+ if len(after_that) > 0 and after_that[0] in ['incomparably', 'incredibly', 'remarkably', 'significantly']:
1155
+ # Likely missing a verb
1156
+ if len(after_that) > 1 and after_that[1] in ['less', 'more', 'better', 'worse']:
1157
+ # Insert "are"
1158
+ generated = generated[:that_index+5] + "are " + generated[that_index+5:]
1159
+
1160
+ # Fix awkward prepositional phrases
1161
+ # "tuition fees USA for Indians" -> "tuition fees in the USA for Indians"
1162
+ awkward_patterns = [
1163
+ (r'\bfees\s+USA\b', 'fees in the USA'),
1164
+ (r'\bfees\s+US\b', 'fees in the US'),
1165
+ (r'\bstudies\s+USA\b', 'studies in the USA'),
1166
+ (r'\bcost\s+USA\b', 'cost in the USA'),
1167
+ ]
1168
+
1169
+ for pattern, replacement in awkward_patterns:
1170
+ generated = re.sub(pattern, replacement, generated, flags=re.IGNORECASE)
1171
+
1172
+ # Ensure proper ending punctuation
1173
  if generated and generated[-1] not in '.!?:,;':
1174
  # Check original ending
1175
  orig_stripped = original.strip()
 
1182
  else:
1183
  generated += '.'
1184
  elif orig_stripped.endswith('!'):
1185
+ generated += '!'
 
 
 
 
 
1186
  elif orig_stripped.endswith(':'):
1187
  generated += ':'
1188
  else:
1189
  generated += '.'
1190
 
1191
+ # Fix awkward colons in the middle of sentences
1192
+ if ':' in generated and not generated.endswith(':'):
1193
+ # Check if it's a list introduction (which is fine)
1194
+ colon_index = generated.find(':')
1195
+ after_colon = generated[colon_index+1:].strip()
1196
+ # If what follows isn't a list or explanation, replace with semicolon or comma
1197
+ if after_colon and not any(after_colon.startswith(w) for w in ['the', 'a', 'an', '1.', '•', '-']):
1198
+ if 'they' in after_colon.lower()[:10]:
1199
+ # Like "have equal contact: they" -> "have equal contact; they"
1200
+ generated = generated.replace(':', ';', 1)
1201
+
1202
+ # Ensure first letter is capitalized
1203
+ if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]):
1204
  generated = generated[0].upper() + generated[1:]
1205
 
1206
  return generated