Spaces:

EdysorEdutech
/

human_final

Paused

App Files Files Community

EdysorEdutech commited on Jul 25

Commit

2f2dc26

verified ·

1 Parent(s): 4633815

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -100

app.py CHANGED Viewed

@@ -279,60 +279,48 @@ class HumanLikeVariations:
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
-            # Add natural speech patterns (12% chance) - reduced and more selective
-            if random.random() < 0.12 and len(sentence.split()) > 12:
-                # Only add where it sounds natural
                 words = sentence.split()
-                # Find natural positions (after commas, before explanations)
-                natural_positions = []
-                for idx, word in enumerate(words):
-                    if idx > 3 and idx < len(words) - 3:
-                        if word.endswith(',') or words[idx-1].endswith(','):
-                            natural_positions.append(idx)
-                if natural_positions and random.random() < 0.5:
-                    pos = random.choice(natural_positions)
-                    if random.random() < 0.7:
-                        words.insert(pos, "you know,")
-                    else:
-                        words.insert(pos, "I mean,")
-                    sentence = ' '.join(words)
-            # Add READABLE human errors (5% chance) - reduced
-            if random.random() < 0.05 and len(sentence.split()) > 10:
-                error_applied = False
-                # Missing Oxford comma (most common and acceptable)
-                if not error_applied and ', and ' in sentence:
-                    if random.random() < 0.6:
-                        sentence = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', sentence, count=1)
-                        error_applied = True
-                # Very occasional its/it's confusion (but only where it makes sense)
-                if not error_applied and random.random() < 0.3:
-                    if " its " in sentence:
-                        # Check if followed by a verb (where it's would be correct)
-                        match = re.search(r'\bits\s+(\w+ing|\w+ed)\b', sentence)
-                        if match:
-                            sentence = sentence.replace(" its ", " it's ", 1)
-                            error_applied = True
-                # Occasional missing article (but subtle)
-                if not error_applied and random.random() < 0.2:
-                    # Only with "the" before certain nouns
-                    if " the same " in sentence:
-                        sentence = sentence.replace(" the same ", " same ", 1)
-                        error_applied = True
-            # Natural sentence combinations (15% chance) - reduced
-            if i < len(sentences) - 1 and random.random() < 0.15:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
-                    # Only combine if semantically related
-                    if any(next_sent.lower().startswith(w) for w in ['this', 'that', 'it', 'which']):
-                        # Natural combination
-                        sentence = sentence.rstrip('.') + ", " + next_sent[0].lower() + next_sent[1:]
                         sentences[i+1] = ""  # Mark as processed
             result_sentences.append(sentence)
@@ -1133,43 +1121,39 @@ class EnhancedDipperHumanizer:
         generated = generated.strip()
-        # Fix fragments and incomplete thoughts
         words = generated.split()
-        # Check for sentence fragments (less than 3 words or no verb)
-        if len(words) < 3:
-            # Try to merge with previous context or expand
-            if len(words) == 2 and words[1].endswith('?'):
-                # Like "Some news?" - expand it
-                generated = "Here's some " + words[0].lower() + " " + words[1]
-            else:
-                # Too short, return original
-                return original
-        # Fix missing verbs or awkward constructions
-        # Check for patterns like "that incomparably less" (missing "are")
-        if ' that ' in generated:
-            that_index = generated.find(' that ')
-            after_that = generated[that_index+5:].split()
-            if len(after_that) > 0 and after_that[0] in ['incomparably', 'incredibly', 'remarkably', 'significantly']:
-                # Likely missing a verb
-                if len(after_that) > 1 and after_that[1] in ['less', 'more', 'better', 'worse']:
-                    # Insert "are"
-                    generated = generated[:that_index+5] + "are " + generated[that_index+5:]
-        # Fix awkward prepositional phrases
-        # "tuition fees USA for Indians" -> "tuition fees in the USA for Indians"
-        awkward_patterns = [
-            (r'\bfees\s+USA\b', 'fees in the USA'),
-            (r'\bfees\s+US\b', 'fees in the US'),
-            (r'\bstudies\s+USA\b', 'studies in the USA'),
-            (r'\bcost\s+USA\b', 'cost in the USA'),
-        ]
-        for pattern, replacement in awkward_patterns:
-            generated = re.sub(pattern, replacement, generated, flags=re.IGNORECASE)
-        # Ensure proper ending punctuation
         if generated and generated[-1] not in '.!?:,;':
             # Check original ending
             orig_stripped = original.strip()
@@ -1182,25 +1166,20 @@ class EnhancedDipperHumanizer:
                 else:
                     generated += '.'
             elif orig_stripped.endswith('!'):
-                generated += '!'
             elif orig_stripped.endswith(':'):
                 generated += ':'
             else:
                 generated += '.'
-        # Fix awkward colons in the middle of sentences
-        if ':' in generated and not generated.endswith(':'):
-            # Check if it's a list introduction (which is fine)
-            colon_index = generated.find(':')
-            after_colon = generated[colon_index+1:].strip()
-            # If what follows isn't a list or explanation, replace with semicolon or comma
-            if after_colon and not any(after_colon.startswith(w) for w in ['the', 'a', 'an', '1.', '•', '-']):
-                if 'they' in after_colon.lower()[:10]:
-                    # Like "have equal contact: they" -> "have equal contact; they"
-                    generated = generated.replace(':', ';', 1)
-        # Ensure first letter is capitalized
-        if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]):
             generated = generated[0].upper() + generated[1:]
         return generated

             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
+            # Add natural speech patterns (15% chance)
+            if random.random() < 0.15 and len(sentence.split()) > 10:
+                # Natural interruptions that humans actually use
+                if random.random() < 0.5:
+                    # Add "you know" or "I mean" naturally
+                    words = sentence.split()
+                    if len(words) > 6:
+                        pos = random.randint(3, len(words)-3)
+                        if random.random() < 0.5:
+                            words.insert(pos, "you know,")
+                        else:
+                            words.insert(pos, "I mean,")
+                        sentence = ' '.join(words)
+                else:
+                    # Start with natural opener
+                    openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
+                    sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
+            # Add subtle errors that humans make (8% chance)
+            if random.random() < 0.08:
                 words = sentence.split()
+                if len(words) > 5:
+                    # Common comma omissions
+                    if ", and" in sentence and random.random() < 0.3:
+                        sentence = sentence.replace(", and", " and", 1)
+                    # Double words occasionally
+                    elif random.random() < 0.2:
+                        idx = random.randint(1, len(words)-2)
+                        if words[idx].lower() in ['the', 'a', 'to', 'in', 'on', 'at']:
+                            words.insert(idx+1, words[idx])
+                            sentence = ' '.join(words)
+            # Natural sentence combinations (20% chance)
+            if i < len(sentences) - 1 and random.random() < 0.2:
                 next_sent = sentences[i+1].strip()
                 if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
+                    # Natural connectors based on content
+                    if any(w in next_sent.lower() for w in ['but', 'however', 'although']):
+                        sentence = sentence.rstrip('.') + ", but " + next_sent[0].lower() + next_sent[1:]
+                        sentences[i+1] = ""  # Mark as processed
+                    elif any(w in next_sent.lower() for w in ['also', 'too', 'as well']):
+                        sentence = sentence.rstrip('.') + " and " + next_sent[0].lower() + next_sent[1:]
                         sentences[i+1] = ""  # Mark as processed
             result_sentences.append(sentence)
         generated = generated.strip()
+        # Check if the sentence seems complete semantically
         words = generated.split()
+        if len(words) >= 3:
+            # Check if last word is a good ending word
+            last_word = words[-1].lower().rstrip('.,!?;:')
+            # Common ending words that might not need punctuation fix
+            ending_words = {
+                'too', 'also', 'well', 'though', 'however',
+                'furthermore', 'moreover', 'indeed', 'anyway',
+                'regardless', 'nonetheless', 'therefore', 'thus'
+            }
+            # If it ends with a good word, just add appropriate punctuation
+            if last_word in ending_words:
+                if generated[-1] not in '.!?':
+                    generated += '.'
+                return generated
+        # Check for cut-off patterns
+        if len(words) > 0:
+            last_word = words[-1]
+            # Remove if it's clearly cut off (1-2 chars, no vowels)
+            # But don't remove valid short words like "is", "of", "to", etc.
+            short_valid_words = {'is', 'of', 'to', 'in', 'on', 'at', 'by', 'or', 'if', 'so', 'up', 'no', 'we', 'he', 'me', 'be', 'do', 'go'}
+            if (len(last_word) <= 2 and
+                last_word.lower() not in short_valid_words and
+                not any(c in 'aeiouAEIOU' for c in last_word)):
+                words = words[:-1]
+                generated = ' '.join(words)
+        # Add ending punctuation based on context
         if generated and generated[-1] not in '.!?:,;':
             # Check original ending
             orig_stripped = original.strip()
                 else:
                     generated += '.'
             elif orig_stripped.endswith('!'):
+                # Check if generated seems exclamatory
+                exclaim_words = ['amazing', 'incredible', 'fantastic', 'terrible', 'awful', 'wonderful', 'excellent']
+                if any(word in generated.lower() for word in exclaim_words):
+                    generated += '!'
+                else:
+                    generated += '.'
             elif orig_stripped.endswith(':'):
                 generated += ':'
             else:
                 generated += '.'
+        # Ensure first letter is capitalized ONLY if it's sentence start
+        # Don't capitalize words like "iPhone" or "eBay" or placeholders
+        if generated and generated[0].islower() and not self.is_likely_acronym_or_proper_noun(generated.split()[0]) and not generated.startswith('__KW'):
             generated = generated[0].upper() + generated[1:]
         return generated