EdysorEdutech commited on
Commit
2f2dc26
·
verified ·
1 Parent(s): 4633815

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -100
app.py CHANGED
@@ -279,60 +279,48 @@ class HumanLikeVariations:
279
  # Natural contractions throughout
280
  sentence = self.apply_contractions(sentence)
281
 
282
- # Add natural speech patterns (12% chance) - reduced and more selective
283
- if random.random() < 0.12 and len(sentence.split()) > 12:
284
- # Only add where it sounds natural
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  words = sentence.split()
286
-
287
- # Find natural positions (after commas, before explanations)
288
- natural_positions = []
289
- for idx, word in enumerate(words):
290
- if idx > 3 and idx < len(words) - 3:
291
- if word.endswith(',') or words[idx-1].endswith(','):
292
- natural_positions.append(idx)
293
-
294
- if natural_positions and random.random() < 0.5:
295
- pos = random.choice(natural_positions)
296
- if random.random() < 0.7:
297
- words.insert(pos, "you know,")
298
- else:
299
- words.insert(pos, "I mean,")
300
- sentence = ' '.join(words)
301
 
302
- # Add READABLE human errors (5% chance) - reduced
303
- if random.random() < 0.05 and len(sentence.split()) > 10:
304
- error_applied = False
305
-
306
- # Missing Oxford comma (most common and acceptable)
307
- if not error_applied and ', and ' in sentence:
308
- if random.random() < 0.6:
309
- sentence = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', sentence, count=1)
310
- error_applied = True
311
-
312
- # Very occasional its/it's confusion (but only where it makes sense)
313
- if not error_applied and random.random() < 0.3:
314
- if " its " in sentence:
315
- # Check if followed by a verb (where it's would be correct)
316
- match = re.search(r'\bits\s+(\w+ing|\w+ed)\b', sentence)
317
- if match:
318
- sentence = sentence.replace(" its ", " it's ", 1)
319
- error_applied = True
320
-
321
- # Occasional missing article (but subtle)
322
- if not error_applied and random.random() < 0.2:
323
- # Only with "the" before certain nouns
324
- if " the same " in sentence:
325
- sentence = sentence.replace(" the same ", " same ", 1)
326
- error_applied = True
327
-
328
- # Natural sentence combinations (15% chance) - reduced
329
- if i < len(sentences) - 1 and random.random() < 0.15:
330
  next_sent = sentences[i+1].strip()
331
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
332
- # Only combine if semantically related
333
- if any(next_sent.lower().startswith(w) for w in ['this', 'that', 'it', 'which']):
334
- # Natural combination
335
- sentence = sentence.rstrip('.') + ", " + next_sent[0].lower() + next_sent[1:]
 
 
336
  sentences[i+1] = "" # Mark as processed
337
 
338
  result_sentences.append(sentence)
@@ -1133,43 +1121,39 @@ class EnhancedDipperHumanizer:
1133
 
1134
  generated = generated.strip()
1135
 
1136
- # Fix fragments and incomplete thoughts
1137
  words = generated.split()
1138
-
1139
- # Check for sentence fragments (less than 3 words or no verb)
1140
- if len(words) < 3:
1141
- # Try to merge with previous context or expand
1142
- if len(words) == 2 and words[1].endswith('?'):
1143
- # Like "Some news?" - expand it
1144
- generated = "Here's some " + words[0].lower() + " " + words[1]
1145
- else:
1146
- # Too short, return original
1147
- return original
1148
-
1149
- # Fix missing verbs or awkward constructions
1150
- # Check for patterns like "that incomparably less" (missing "are")
1151
- if ' that ' in generated:
1152
- that_index = generated.find(' that ')
1153
- after_that = generated[that_index+5:].split()
1154
- if len(after_that) > 0 and after_that[0] in ['incomparably', 'incredibly', 'remarkably', 'significantly']:
1155
- # Likely missing a verb
1156
- if len(after_that) > 1 and after_that[1] in ['less', 'more', 'better', 'worse']:
1157
- # Insert "are"
1158
- generated = generated[:that_index+5] + "are " + generated[that_index+5:]
1159
-
1160
- # Fix awkward prepositional phrases
1161
- # "tuition fees USA for Indians" -> "tuition fees in the USA for Indians"
1162
- awkward_patterns = [
1163
- (r'\bfees\s+USA\b', 'fees in the USA'),
1164
- (r'\bfees\s+US\b', 'fees in the US'),
1165
- (r'\bstudies\s+USA\b', 'studies in the USA'),
1166
- (r'\bcost\s+USA\b', 'cost in the USA'),
1167
- ]
1168
-
1169
- for pattern, replacement in awkward_patterns:
1170
- generated = re.sub(pattern, replacement, generated, flags=re.IGNORECASE)
1171
-
1172
- # Ensure proper ending punctuation
1173
  if generated and generated[-1] not in '.!?:,;':
1174
  # Check original ending
1175
  orig_stripped = original.strip()
@@ -1182,25 +1166,20 @@ class EnhancedDipperHumanizer:
1182
  else:
1183
  generated += '.'
1184
  elif orig_stripped.endswith('!'):
1185
- generated += '!'
 
 
 
 
 
1186
  elif orig_stripped.endswith(':'):
1187
  generated += ':'
1188
  else:
1189
  generated += '.'
1190
 
1191
- # Fix awkward colons in the middle of sentences
1192
- if ':' in generated and not generated.endswith(':'):
1193
- # Check if it's a list introduction (which is fine)
1194
- colon_index = generated.find(':')
1195
- after_colon = generated[colon_index+1:].strip()
1196
- # If what follows isn't a list or explanation, replace with semicolon or comma
1197
- if after_colon and not any(after_colon.startswith(w) for w in ['the', 'a', 'an', '1.', '•', '-']):
1198
- if 'they' in after_colon.lower()[:10]:
1199
- # Like "have equal contact: they" -> "have equal contact; they"
1200
- generated = generated.replace(':', ';', 1)
1201
-
1202
- # Ensure first letter is capitalized
1203
- if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]):
1204
  generated = generated[0].upper() + generated[1:]
1205
 
1206
  return generated
 
279
  # Natural contractions throughout
280
  sentence = self.apply_contractions(sentence)
281
 
282
+ # Add natural speech patterns (15% chance)
283
+ if random.random() < 0.15 and len(sentence.split()) > 10:
284
+ # Natural interruptions that humans actually use
285
+ if random.random() < 0.5:
286
+ # Add "you know" or "I mean" naturally
287
+ words = sentence.split()
288
+ if len(words) > 6:
289
+ pos = random.randint(3, len(words)-3)
290
+ if random.random() < 0.5:
291
+ words.insert(pos, "you know,")
292
+ else:
293
+ words.insert(pos, "I mean,")
294
+ sentence = ' '.join(words)
295
+ else:
296
+ # Start with natural opener
297
+ openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
298
+ sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
299
+
300
+ # Add subtle errors that humans make (8% chance)
301
+ if random.random() < 0.08:
302
  words = sentence.split()
303
+ if len(words) > 5:
304
+ # Common comma omissions
305
+ if ", and" in sentence and random.random() < 0.3:
306
+ sentence = sentence.replace(", and", " and", 1)
307
+ # Double words occasionally
308
+ elif random.random() < 0.2:
309
+ idx = random.randint(1, len(words)-2)
310
+ if words[idx].lower() in ['the', 'a', 'to', 'in', 'on', 'at']:
311
+ words.insert(idx+1, words[idx])
312
+ sentence = ' '.join(words)
 
 
 
 
 
313
 
314
+ # Natural sentence combinations (20% chance)
315
+ if i < len(sentences) - 1 and random.random() < 0.2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  next_sent = sentences[i+1].strip()
317
  if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
318
+ # Natural connectors based on content
319
+ if any(w in next_sent.lower() for w in ['but', 'however', 'although']):
320
+ sentence = sentence.rstrip('.') + ", but " + next_sent[0].lower() + next_sent[1:]
321
+ sentences[i+1] = "" # Mark as processed
322
+ elif any(w in next_sent.lower() for w in ['also', 'too', 'as well']):
323
+ sentence = sentence.rstrip('.') + " and " + next_sent[0].lower() + next_sent[1:]
324
  sentences[i+1] = "" # Mark as processed
325
 
326
  result_sentences.append(sentence)
 
1121
 
1122
  generated = generated.strip()
1123
 
1124
+ # Check if the sentence seems complete semantically
1125
  words = generated.split()
1126
+ if len(words) >= 3:
1127
+ # Check if last word is a good ending word
1128
+ last_word = words[-1].lower().rstrip('.,!?;:')
1129
+
1130
+ # Common ending words that might not need punctuation fix
1131
+ ending_words = {
1132
+ 'too', 'also', 'well', 'though', 'however',
1133
+ 'furthermore', 'moreover', 'indeed', 'anyway',
1134
+ 'regardless', 'nonetheless', 'therefore', 'thus'
1135
+ }
1136
+
1137
+ # If it ends with a good word, just add appropriate punctuation
1138
+ if last_word in ending_words:
1139
+ if generated[-1] not in '.!?':
1140
+ generated += '.'
1141
+ return generated
1142
+
1143
+ # Check for cut-off patterns
1144
+ if len(words) > 0:
1145
+ last_word = words[-1]
1146
+
1147
+ # Remove if it's clearly cut off (1-2 chars, no vowels)
1148
+ # But don't remove valid short words like "is", "of", "to", etc.
1149
+ short_valid_words = {'is', 'of', 'to', 'in', 'on', 'at', 'by', 'or', 'if', 'so', 'up', 'no', 'we', 'he', 'me', 'be', 'do', 'go'}
1150
+ if (len(last_word) <= 2 and
1151
+ last_word.lower() not in short_valid_words and
1152
+ not any(c in 'aeiouAEIOU' for c in last_word)):
1153
+ words = words[:-1]
1154
+ generated = ' '.join(words)
1155
+
1156
+ # Add ending punctuation based on context
 
 
 
 
1157
  if generated and generated[-1] not in '.!?:,;':
1158
  # Check original ending
1159
  orig_stripped = original.strip()
 
1166
  else:
1167
  generated += '.'
1168
  elif orig_stripped.endswith('!'):
1169
+ # Check if generated seems exclamatory
1170
+ exclaim_words = ['amazing', 'incredible', 'fantastic', 'terrible', 'awful', 'wonderful', 'excellent']
1171
+ if any(word in generated.lower() for word in exclaim_words):
1172
+ generated += '!'
1173
+ else:
1174
+ generated += '.'
1175
  elif orig_stripped.endswith(':'):
1176
  generated += ':'
1177
  else:
1178
  generated += '.'
1179
 
1180
+ # Ensure first letter is capitalized ONLY if it's sentence start
1181
+ # Don't capitalize words like "iPhone" or "eBay" or placeholders
1182
+ if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]) and not generated.startswith('__KW'):
 
 
 
 
 
 
 
 
 
 
1183
  generated = generated[0].upper() + generated[1:]
1184
 
1185
  return generated