Spaces:

EdysorEdutech
/

human_final

Paused

App Files Files Community

EdysorEdutech commited on Jul 25, 2025

Commit

4633815

verified ·

1 Parent(s): 94251cc

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -79

app.py CHANGED Viewed

@@ -279,48 +279,60 @@ class HumanLikeVariations:
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
-            # Add natural speech patterns (15% chance)
-            if random.random() < 0.15 and len(sentence.split()) > 10:
-                # Natural interruptions that humans actually use
-                if random.random() < 0.5:
-                    # Add "you know" or "I mean" naturally
-                    words = sentence.split()
-                    if len(words) > 6:
-                        pos = random.randint(3, len(words)-3)
-                        if random.random() < 0.5:
-                            words.insert(pos, "you know,")
-                        else:
-                            words.insert(pos, "I mean,")
-                        sentence = ' '.join(words)
-                else:
-                    # Start with natural opener
-                    openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
-                    sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
-            # Add subtle errors that humans make (8% chance)
-            if random.random() < 0.08:
                 words = sentence.split()
-                if len(words) > 5:
-                    # Common comma omissions
-                    if ", and" in sentence and random.random() < 0.3:
-                        sentence = sentence.replace(", and", " and", 1)
-                    # Double words occasionally
-                    elif random.random() < 0.2:
-                        idx = random.randint(1, len(words)-2)
-                        if words[idx].lower() in ['the', 'a', 'to', 'in', 'on', 'at']:
-                            words.insert(idx+1, words[idx])
-                            sentence = ' '.join(words)
-            # Natural sentence combinations (20% chance)
-            if i < len(sentences) - 1 and random.random() < 0.2:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
-                    # Natural connectors based on content
-                    if any(w in next_sent.lower() for w in ['but', 'however', 'although']):
-                        sentence = sentence.rstrip('.') + ", but " + next_sent[0].lower() + next_sent[1:]
-                        sentences[i+1] = ""  # Mark as processed
-                    elif any(w in next_sent.lower() for w in ['also', 'too', 'as well']):
-                        sentence = sentence.rstrip('.') + " and " + next_sent[0].lower() + next_sent[1:]
                         sentences[i+1] = ""  # Mark as processed
             result_sentences.append(sentence)
@@ -1121,39 +1133,43 @@ class EnhancedDipperHumanizer:
         generated = generated.strip()
-        # Check if the sentence seems complete semantically
         words = generated.split()
-        if len(words) >= 3:
-            # Check if last word is a good ending word
-            last_word = words[-1].lower().rstrip('.,!?;:')
-            # Common ending words that might not need punctuation fix
-            ending_words = {
-                'too', 'also', 'well', 'though', 'however',
-                'furthermore', 'moreover', 'indeed', 'anyway',
-                'regardless', 'nonetheless', 'therefore', 'thus'
-            }
-            # If it ends with a good word, just add appropriate punctuation
-            if last_word in ending_words:
-                if generated[-1] not in '.!?':
-                    generated += '.'
-                return generated
-        # Check for cut-off patterns
-        if len(words) > 0:
-            last_word = words[-1]
-            # Remove if it's clearly cut off (1-2 chars, no vowels)
-            # But don't remove valid short words like "is", "of", "to", etc.
-            short_valid_words = {'is', 'of', 'to', 'in', 'on', 'at', 'by', 'or', 'if', 'so', 'up', 'no', 'we', 'he', 'me', 'be', 'do', 'go'}
-            if (len(last_word) <= 2 and
-                last_word.lower() not in short_valid_words and
-                not any(c in 'aeiouAEIOU' for c in last_word)):
-                words = words[:-1]
-                generated = ' '.join(words)
-        # Add ending punctuation based on context
         if generated and generated[-1] not in '.!?:,;':
             # Check original ending
             orig_stripped = original.strip()
@@ -1166,20 +1182,25 @@ class EnhancedDipperHumanizer:
                 else:
                     generated += '.'
             elif orig_stripped.endswith('!'):
-                # Check if generated seems exclamatory
-                exclaim_words = ['amazing', 'incredible', 'fantastic', 'terrible', 'awful', 'wonderful', 'excellent']
-                if any(word in generated.lower() for word in exclaim_words):
-                    generated += '!'
-                else:
-                    generated += '.'
             elif orig_stripped.endswith(':'):
                 generated += ':'
             else:
                 generated += '.'
-        # Ensure first letter is capitalized ONLY if it's sentence start
-        # Don't capitalize words like "iPhone" or "eBay" or placeholders
-        if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]) and not generated.startswith('__KW'):
             generated = generated[0].upper() + generated[1:]
         return generated

             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
+            # Add natural speech patterns (12% chance) - reduced and more selective
+            if random.random() < 0.12 and len(sentence.split()) > 12:
+                # Only add where it sounds natural
                 words = sentence.split()
+                # Find natural positions (after commas, before explanations)
+                natural_positions = []
+                for idx, word in enumerate(words):
+                    if idx > 3 and idx < len(words) - 3:
+                        if word.endswith(',') or words[idx-1].endswith(','):
+                            natural_positions.append(idx)
+                if natural_positions and random.random() < 0.5:
+                    pos = random.choice(natural_positions)
+                    if random.random() < 0.7:
+                        words.insert(pos, "you know,")
+                    else:
+                        words.insert(pos, "I mean,")
+                    sentence = ' '.join(words)
+            # Add READABLE human errors (5% chance) - reduced
+            if random.random() < 0.05 and len(sentence.split()) > 10:
+                error_applied = False
+                # Missing Oxford comma (most common and acceptable)
+                if not error_applied and ', and ' in sentence:
+                    if random.random() < 0.6:
+                        sentence = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', sentence, count=1)
+                        error_applied = True
+                # Very occasional its/it's confusion (but only where it makes sense)
+                if not error_applied and random.random() < 0.3:
+                    if " its " in sentence:
+                        # Check if followed by a verb (where it's would be correct)
+                        match = re.search(r'\bits\s+(\w+ing|\w+ed)\b', sentence)
+                        if match:
+                            sentence = sentence.replace(" its ", " it's ", 1)
+                            error_applied = True
+                # Occasional missing article (but subtle)
+                if not error_applied and random.random() < 0.2:
+                    # Only with "the" before certain nouns
+                    if " the same " in sentence:
+                        sentence = sentence.replace(" the same ", " same ", 1)
+                        error_applied = True
+            # Natural sentence combinations (15% chance) - reduced
+            if i < len(sentences) - 1 and random.random() < 0.15:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
+                    # Only combine if semantically related
+                    if any(next_sent.lower().startswith(w) for w in ['this', 'that', 'it', 'which']):
+                        # Natural combination
+                        sentence = sentence.rstrip('.') + ", " + next_sent[0].lower() + next_sent[1:]
                         sentences[i+1] = ""  # Mark as processed
             result_sentences.append(sentence)
         generated = generated.strip()
+        # Fix fragments and incomplete thoughts
         words = generated.split()
+        # Check for sentence fragments (less than 3 words or no verb)
+        if len(words) < 3:
+            # Try to merge with previous context or expand
+            if len(words) == 2 and words[1].endswith('?'):
+                # Like "Some news?" - expand it
+                generated = "Here's some " + words[0].lower() + " " + words[1]
+            else:
+                # Too short, return original
+                return original
+        # Fix missing verbs or awkward constructions
+        # Check for patterns like "that incomparably less" (missing "are")
+        if ' that ' in generated:
+            that_index = generated.find(' that ')
+            after_that = generated[that_index+5:].split()
+            if len(after_that) > 0 and after_that[0] in ['incomparably', 'incredibly', 'remarkably', 'significantly']:
+                # Likely missing a verb
+                if len(after_that) > 1 and after_that[1] in ['less', 'more', 'better', 'worse']:
+                    # Insert "are"
+                    generated = generated[:that_index+5] + "are " + generated[that_index+5:]
+        # Fix awkward prepositional phrases
+        # "tuition fees USA for Indians" -> "tuition fees in the USA for Indians"
+        awkward_patterns = [
+            (r'\bfees\s+USA\b', 'fees in the USA'),
+            (r'\bfees\s+US\b', 'fees in the US'),
+            (r'\bstudies\s+USA\b', 'studies in the USA'),
+            (r'\bcost\s+USA\b', 'cost in the USA'),
+        ]
+        for pattern, replacement in awkward_patterns:
+            generated = re.sub(pattern, replacement, generated, flags=re.IGNORECASE)
+        # Ensure proper ending punctuation
         if generated and generated[-1] not in '.!?:,;':
             # Check original ending
             orig_stripped = original.strip()
                 else:
                     generated += '.'
             elif orig_stripped.endswith('!'):
+                generated += '!'
             elif orig_stripped.endswith(':'):
                 generated += ':'
             else:
                 generated += '.'
+        # Fix awkward colons in the middle of sentences
+        if ':' in generated and not generated.endswith(':'):
+            # Check if it's a list introduction (which is fine)
+            colon_index = generated.find(':')
+            after_colon = generated[colon_index+1:].strip()
+            # If what follows isn't a list or explanation, replace with semicolon or comma
+            if after_colon and not any(after_colon.startswith(w) for w in ['the', 'a', 'an', '1.', '•', '-']):
+                if 'they' in after_colon.lower()[:10]:
+                    # Like "have equal contact: they" -> "have equal contact; they"
+                    generated = generated.replace(':', ';', 1)
+        # Ensure first letter is capitalized
+        if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]):
             generated = generated[0].upper() + generated[1:]
         return generated