Spaces:

EdysorEdutech
/

human_final

Paused

App Files Files Community

EdysorEdutech commited on Jul 25

Commit

f4f74ad

verified ·

1 Parent(s): dfeccda

Update app.py

Browse files

Files changed (1) hide show

app.py +257 -160

app.py CHANGED Viewed

@@ -144,7 +144,7 @@ class HumanLikeVariations:
         ]
     def add_human_touch(self, text):
-        """Add subtle human-like imperfections - MORE AGGRESSIVE"""
         sentences = text.split('. ')
         modified_sentences = []
@@ -152,80 +152,115 @@ class HumanLikeVariations:
             if not sent.strip():
                 continue
-            # Occasionally start with casual transition (25% chance - increased)
-            if i > 0 and random.random() < 0.25 and len(sent.split()) > 5:
-                transition = random.choice(self.casual_transitions)
                 sent = transition + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
-            # Add filler words occasionally (20% chance - increased)
-            if random.random() < 0.2 and len(sent.split()) > 8:
-                words = sent.split()
-                # Add multiple fillers sometimes
-                num_fillers = random.randint(1, 2)
-                for _ in range(num_fillers):
-                    if len(words) > 4:
-                        insert_pos = random.randint(2, len(words)-2)
-                        filler = random.choice(self.filler_phrases)
-                        words.insert(insert_pos, filler)
-                sent = ' '.join(words)
-            # Add varied sentence starters (15% chance)
-            if i > 0 and random.random() < 0.15 and len(sent.split()) > 10:
-                starter = random.choice(self.varied_starters)
                 sent = starter + " " + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
-            # Occasionally use contractions (35% chance - increased)
             if random.random() < 0.35:
                 sent = self.apply_contractions(sent)
-            # Add occasional comma splices (10% chance) - common human error
-            if random.random() < 0.1 and ',' in sent and len(sent.split()) > 10:
-                # Replace a period with comma sometimes
                 parts = sent.split(', ')
-                if len(parts) > 2:
-                    join_idx = random.randint(1, len(parts)-1)
-                    parts[join_idx-1] = parts[join_idx-1] + ','
-                    sent = ' '.join(parts)
-            # NEW: Add parenthetical thoughts (8% chance)
-            if random.random() < 0.08 and len(sent.split()) > 15:
-                parentheticals = [
-                    "(and that's saying something)",
-                    "(which is pretty interesting)",
-                    "(trust me on this one)",
-                    "(I've seen this firsthand)",
-                    "(no joke)",
-                    "(seriously)",
-                    "(and for good reason)",
-                    "(believe it or not)",
-                    "(surprisingly enough)",
-                    "(which makes sense)",
-                    "(go figure)",
-                    "(who knew?)",
-                    "(makes you think)",
-                    "(worth considering)"
-                ]
-                words = sent.split()
-                insert_pos = random.randint(len(words)//3, 2*len(words)//3)
-                parenthetical = random.choice(parentheticals)
-                words.insert(insert_pos, parenthetical)
-                sent = ' '.join(words)
-            # NEW: Occasionally add rhetorical questions (5% chance)
-            if random.random() < 0.05 and i < len(sentences) - 1:
-                rhetorical_questions = [
-                    "Makes sense, right?",
-                    "Pretty cool, huh?",
-                    "Interesting, isn't it?",
-                    "Who would've thought?",
-                    "Sound familiar?",
-                    "See what I mean?",
-                    "Get the picture?",
-                    "Following along?",
-                    "Crazy, right?",
-                    "Wild, isn't it?"
-                ]
-                sent = sent + " " + random.choice(rhetorical_questions)
             modified_sentences.append(sent)
@@ -261,27 +296,41 @@ class HumanLikeVariations:
         return text
     def add_minor_errors(self, text):
-        """Add very minor, human-like errors - MORE REALISTIC"""
         # Occasionally miss Oxford comma (15% chance)
         if random.random() < 0.15:
-            text = re.sub(r'(\w+), (\w+), and', r'\1, \2 and', text)
         # Sometimes use 'which' instead of 'that' (8% chance)
         if random.random() < 0.08:
-            text = text.replace(' that ', ' which ', 1)
-        # NEW: Add very occasional typos (3% chance per sentence)
         sentences = text.split('. ')
         for i, sent in enumerate(sentences):
-            if random.random() < 0.03 and len(sent.split()) > 10:
                 words = sent.split()
                 # Pick a random word to potentially typo
-                word_idx = random.randint(0, len(words)-1)
                 word = words[word_idx].lower()
-                # Only typo common words
-                if word in self.common_typos and random.random() < 0.5:
-                    typo = random.choice(self.common_typos[word])
                     # Preserve original capitalization
                     if words[word_idx][0].isupper():
                         typo = typo[0].upper() + typo[1:]
@@ -290,28 +339,22 @@ class HumanLikeVariations:
         text = '. '.join(sentences)
-        # NEW: Occasionally double a word (2% chance)
-        if random.random() < 0.02:
-            words = text.split()
-            if len(words) > 20:
-                # Pick a small common word to double
-                small_words = ['the', 'a', 'an', 'is', 'was', 'are', 'were', 'to', 'of', 'in', 'on']
-                for idx, word in enumerate(words):
-                    if word.lower() in small_words and random.random() < 0.1:
-                        words[idx] = word + ' ' + word
-                        break
-                text = ' '.join(words)
-        # NEW: Mix up common homophones occasionally (3% chance)
-        if random.random() < 0.03:
-            homophones = [
-                ('their', 'there'), ('your', 'you\'re'), ('its', 'it\'s'),
-                ('then', 'than'), ('to', 'too'), ('effect', 'affect')
             ]
-            for pair in homophones:
-                if pair[0] in text and random.random() < 0.3:
-                    text = text.replace(pair[0], pair[1], 1)
-                    break
         return text
@@ -1161,7 +1204,7 @@ class EnhancedDipperHumanizer:
             return text
     def apply_sentence_variation(self, text):
-        """Apply natural sentence structure variations - MORE AGGRESSIVE"""
         sentences = self.split_into_sentences_advanced(text)
         varied_sentences = []
@@ -1170,89 +1213,143 @@ class EnhancedDipperHumanizer:
             if not sentence.strip():
                 continue
-            # MORE aggressive variations
-            # Combine short sentences more often (50% chance)
             if (i < len(sentences) - 1 and
-                len(sentence.split()) < 15 and
                 len(sentences[i+1].split()) < 15 and
                 random.random() < 0.5):
-                connectors = [', and', ', but', '; however,', '. Also,', '. Plus,', ', so', ', which means',
-                             ' - and', ' - but', '; meanwhile,', '. That said,', ', yet', ' - though']
-                connector = random.choice(connectors)
-                # Handle the next sentence properly
                 next_sent = sentences[i+1].strip()
                 if next_sent:
-                    combined = f"{sentence.rstrip('.')}{connector} {next_sent[0].lower()}{next_sent[1:]}"
-                    varied_sentences.append(combined)
-                    sentences[i+1] = ""  # Mark as processed
             elif sentence:  # Only process non-empty sentences
-                # Split very long sentences more aggressively
-                if len(sentence.split()) > 18 and ',' in sentence:
-                    parts = sentence.split(', ', 1)
-                    if len(parts) == 2 and len(parts[1].split()) > 6:
-                        # 70% chance to split
-                        if random.random() < 0.7:
-                            varied_sentences.append(parts[0] + '.')
-                            # Ensure second part starts with capital
-                            if parts[1]:
-                                varied_sentences.append(parts[1][0].upper() + parts[1][1:])
-                        else:
-                            varied_sentences.append(sentence)
                     else:
                         varied_sentences.append(sentence)
                 else:
-                    # Add natural variations more often (35% chance)
                     if i > 0 and random.random() < 0.35:
-                        # Sometimes add a transition
-                        transitions = ['Furthermore, ', 'Additionally, ', 'Moreover, ', 'Also, ',
-                                     'Besides, ', 'What\'s more, ', 'In addition, ', 'Not only that, ',
-                                     'To add to that, ', 'On top of that, ', 'Beyond that, ']
-                        transition = random.choice(transitions)
-                        if sentence[0].isupper():
-                            sentence = transition + sentence[0].lower() + sentence[1:]
-                    # Add mid-sentence interruptions (10% chance)
-                    if random.random() < 0.1 and len(sentence.split()) > 12:
-                        interruptions = [
-                            " - and this is crucial - ",
-                            " - believe me - ",
-                            " - no kidding - ",
-                            " (and yes, I mean it) ",
-                            " - stay with me here - ",
-                            " - and I'm not exaggerating - "
-                        ]
-                        words = sentence.split()
-                        pos = random.randint(len(words)//3, 2*len(words)//3)
-                        words.insert(pos, random.choice(interruptions))
-                        sentence = ' '.join(words)
                     varied_sentences.append(sentence)
         # Post-process for additional human patterns
         result = ' '.join([s for s in varied_sentences if s])
-        # Add occasional fragments for human touch (5% chance)
-        if random.random() < 0.05:
-            fragments = [
-                "Crazy, I know.",
-                "Wild stuff.",
-                "Makes you think.",
-                "Pretty interesting.",
-                "Go figure.",
-                "Who knew?",
-                "There you have it.",
-                "Food for thought.",
-                "Just saying.",
-                "Worth considering."
-            ]
             sentences = result.split('. ')
-            if len(sentences) > 3:
-                insert_pos = random.randint(1, len(sentences)-1)
-                sentences.insert(insert_pos, random.choice(fragments))
-                result = '. '.join(sentences)
         return result

         ]
     def add_human_touch(self, text):
+        """Add subtle human-like imperfections - MORE CONTEXT-AWARE"""
         sentences = text.split('. ')
         modified_sentences = []
             if not sent.strip():
                 continue
+            # Parse sentence structure for better filler placement
+            words = sent.split()
+            if not words:
+                continue
+            # Occasionally start with casual transition (25% chance)
+            if i > 0 and random.random() < 0.25 and len(words) > 5:
+                # Choose transitions based on sentence type
+                if any(q in sent.lower() for q in ['why', 'how', 'what', 'when', 'where']):
+                    # Question-appropriate transitions
+                    transition = random.choice(["So, ", "Well, ", "Now, ", "Okay, ", "Right, "])
+                elif any(w in sent.lower() for w in ['however', 'but', 'although', 'despite']):
+                    # Contrast-appropriate transitions
+                    transition = random.choice(["Still, ", "Yet, ", "Even so, ", "That said, ", "Nonetheless, "])
+                else:
+                    # General transitions
+                    transition = random.choice(self.casual_transitions[:20])  # Use more common ones
                 sent = transition + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
+            # Add filler words occasionally (20% chance) - SMARTER PLACEMENT
+            if random.random() < 0.2 and len(words) > 8:
+                # Find good positions for fillers (after verbs, before adjectives, etc.)
+                good_positions = []
+                for idx, word in enumerate(words):
+                    if idx > 0 and idx < len(words) - 1:
+                        # After "is/are/was/were" (good for "really", "actually", etc.)
+                        if word.lower() in ['is', 'are', 'was', 'were', 'been', 'be']:
+                            good_positions.append(idx + 1)
+                        # Before adjectives (good for "quite", "rather", etc.)
+                        elif words[idx-1].lower() in ['a', 'an', 'the', 'very', 'so']:
+                            good_positions.append(idx)
+                        # After "can/could/will/would" (good for "probably", "definitely", etc.)
+                        elif word.lower() in ['can', 'could', 'will', 'would', 'should', 'might', 'may']:
+                            good_positions.append(idx + 1)
+                if good_positions:
+                    insert_pos = random.choice(good_positions)
+                    # Choose appropriate filler based on context
+                    if words[insert_pos-1].lower() in ['is', 'are', 'was', 'were']:
+                        filler = random.choice(['really', 'actually', 'definitely', 'certainly', 'quite'])
+                    elif words[insert_pos-1].lower() in ['can', 'could', 'will', 'would']:
+                        filler = random.choice(['probably', 'definitely', 'certainly', 'likely', 'possibly'])
+                    else:
+                        filler = random.choice(['quite', 'rather', 'pretty', 'fairly', 'somewhat'])
+                    words.insert(insert_pos, filler)
+                    sent = ' '.join(words)
+            # Add varied sentence starters (15% chance) - MORE LOGICAL
+            if i > 0 and random.random() < 0.15 and len(words) > 10:
+                # Choose starters based on sentence content
+                if any(w in sent.lower() for w in ['research', 'study', 'data', 'evidence']):
+                    starter = random.choice(["Research shows", "Studies indicate", "Evidence suggests", "Data reveals"])
+                elif any(w in sent.lower() for w in ['important', 'crucial', 'vital', 'essential']):
+                    starter = random.choice(["It's worth noting that", "Keep in mind", "Bear in mind that", "The key here is"])
+                else:
+                    starter = random.choice(["When it comes to", "As for", "Regarding", "In terms of"])
                 sent = starter + " " + sent[0].lower() + sent[1:] if len(sent) > 1 else sent
+            # Occasionally use contractions (35% chance)
             if random.random() < 0.35:
                 sent = self.apply_contractions(sent)
+            # Add occasional comma splices (10% chance) - ONLY WHERE IT MAKES SENSE
+            if random.random() < 0.1 and ',' in sent and len(words) > 10:
+                # Only do this with independent clauses
                 parts = sent.split(', ')
+                if len(parts) == 2:
+                    # Check if both parts could be sentences
+                    if (len(parts[0].split()) > 4 and len(parts[1].split()) > 4 and
+                        any(v in parts[1].lower().split()[:3] for v in ['it', 'this', 'that', 'they', 'we', 'i', 'you'])):
+                        sent = parts[0] + ', ' + parts[1]  # Keep the comma splice
+            # NEW: Add parenthetical thoughts (8% chance) - CONTEXT-AWARE
+            if random.random() < 0.08 and len(words) > 15:
+                # Find natural break points (after complete thoughts)
+                break_points = []
+                for idx, word in enumerate(words):
+                    if idx > len(words)//3 and idx < 2*len(words)//3:
+                        if word.endswith(',') or words[idx-1].lower() in ['is', 'are', 'was', 'were']:
+                            break_points.append(idx)
+                if break_points:
+                    insert_pos = random.choice(break_points)
+                    # Choose relevant parenthetical
+                    if any(w in sent.lower() for w in ['surprising', 'interesting', 'amazing']):
+                        parenthetical = random.choice(["(and that's saying something)", "(believe it or not)", "(surprisingly enough)"])
+                    elif any(w in sent.lower() for w in ['obvious', 'clear', 'evident']):
+                        parenthetical = random.choice(["(obviously)", "(clearly)", "(of course)"])
+                    else:
+                        parenthetical = random.choice(["(which makes sense)", "(for good reason)", "(as you'd expect)"])
+                    words.insert(insert_pos, parenthetical)
+                    sent = ' '.join(words)
+            # NEW: Occasionally add rhetorical questions (5% chance) - ONLY AT PARAGRAPH ENDS
+            if random.random() < 0.05 and i == len(sentences) - 1:
+                # Choose question based on sentence content
+                if any(w in sent.lower() for w in ['amazing', 'incredible', 'fantastic']):
+                    question = random.choice(["Pretty cool, right?", "Amazing, isn't it?", "Impressive, huh?"])
+                elif any(w in sent.lower() for w in ['important', 'crucial', 'essential']):
+                    question = random.choice(["Makes sense, right?", "See what I mean?", "Important to remember, yeah?"])
+                else:
+                    question = random.choice(["Interesting, right?", "Makes you think, doesn't it?", "Sound familiar?"])
+                sent = sent + " " + question
             modified_sentences.append(sent)
         return text
     def add_minor_errors(self, text):
+        """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
         # Occasionally miss Oxford comma (15% chance)
         if random.random() < 0.15:
+            # Only in lists, not random commas
+            text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
         # Sometimes use 'which' instead of 'that' (8% chance)
         if random.random() < 0.08:
+            # Only for non-restrictive clauses
+            matches = re.finditer(r'\b(\w+) that (\w+)', text)
+            for match in list(matches)[:1]:  # Only first occurrence
+                if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
+                    text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
+        # NEW: Add very occasional typos (2% chance per sentence) - REDUCED AND CONTROLLED
         sentences = text.split('. ')
         for i, sent in enumerate(sentences):
+            if random.random() < 0.02 and len(sent.split()) > 15:  # Only in longer sentences
                 words = sent.split()
                 # Pick a random word to potentially typo
+                word_idx = random.randint(len(words)//2, len(words)-2)  # Avoid start/end
                 word = words[word_idx].lower()
+                # Only typo common words where typo won't break meaning
+                safe_typos = {
+                    'the': 'teh',
+                    'and': 'adn',
+                    'that': 'taht',
+                    'with': 'wtih',
+                    'from': 'form',
+                    'because': 'becuase'
+                }
+                if word in safe_typos and random.random() < 0.5:
+                    typo = safe_typos[word]
                     # Preserve original capitalization
                     if words[word_idx][0].isupper():
                         typo = typo[0].upper() + typo[1:]
         text = '. '.join(sentences)
+        # Skip double words - too distracting
+        # Mix up common homophones occasionally (2% chance) - ONLY SAFE ONES
+        if random.random() < 0.02:
+            safe_homophones = [
+                ('its', "it's"),  # Very common mistake
+                ('your', "you're"),  # Another common one
             ]
+            for pair in safe_homophones:
+                # Check context to avoid breaking meaning
+                if f" {pair[0]} " in text and random.random() < 0.3:
+                    # Find one instance and check it's safe to replace
+                    pattern = rf'\b{pair[0]}\s+(\w+ing|\w+ed)\b'  # its + verb = likely should be it's
+                    if re.search(pattern, text):
+                        text = re.sub(pattern, f"{pair[1]} \\1", text, count=1)
+                        break
         return text
             return text
     def apply_sentence_variation(self, text):
+        """Apply natural sentence structure variations - MORE INTELLIGENT"""
         sentences = self.split_into_sentences_advanced(text)
         varied_sentences = []
             if not sentence.strip():
                 continue
+            words = sentence.split()
+            # Combine short sentences more often (50% chance) - BUT SMARTLY
             if (i < len(sentences) - 1 and
+                len(words) < 15 and
                 len(sentences[i+1].split()) < 15 and
                 random.random() < 0.5):
                 next_sent = sentences[i+1].strip()
                 if next_sent:
+                    # Check if sentences are related (share common words or themes)
+                    current_words = set(w.lower() for w in words if len(w) > 3)
+                    next_words = set(w.lower() for w in next_sent.split() if len(w) > 3)
+                    # Only combine if they share context or one follows from the other
+                    if current_words & next_words or any(w in next_sent.lower() for w in ['this', 'that', 'these', 'those', 'it']):
+                        # Choose appropriate connector based on relationship
+                        if any(w in next_sent.lower().split()[:3] for w in ['however', 'but', 'yet', 'although']):
+                            connector = random.choice([', but', '; however,', ', yet', ' - though'])
+                        elif any(w in next_sent.lower().split()[:3] for w in ['therefore', 'thus', 'so', 'hence']):
+                            connector = random.choice([', so', '. Therefore,', ', which means', ' - thus'])
+                        elif any(w in next_sent.lower().split()[:3] for w in ['also', 'additionally', 'furthermore']):
+                            connector = random.choice([', and', '. Also,', '. Plus,', ' - additionally,'])
+                        else:
+                            connector = random.choice([', and', ', which', ' - '])
+                        combined = f"{sentence.rstrip('.')}{connector} {next_sent[0].lower()}{next_sent[1:]}"
+                        varied_sentences.append(combined)
+                        sentences[i+1] = ""  # Mark as processed
+                    else:
+                        varied_sentences.append(sentence)
+                else:
+                    varied_sentences.append(sentence)
             elif sentence:  # Only process non-empty sentences
+                # Split very long sentences more intelligently
+                if len(words) > 18:
+                    # Look for natural break points
+                    break_words = ['however', 'therefore', 'moreover', 'furthermore', 'additionally', 'consequently']
+                    conjunctions = [', and', ', but', ', so', ', yet', ', for', ', or', ', nor']
+                    # Find the best break point
+                    best_break = -1
+                    for idx, word in enumerate(words):
+                        if word.lower().rstrip(',') in break_words and idx > len(words)//3:
+                            best_break = idx
+                            break
+                    # If no break word found, look for conjunctions
+                    if best_break == -1:
+                        text_lower = sentence.lower()
+                        for conj in conjunctions:
+                            if conj in text_lower:
+                                # Find position in word list
+                                conj_pos = text_lower.find(conj)
+                                word_count = len(text_lower[:conj_pos].split())
+                                if len(words)//3 < word_count < 2*len(words)//3:
+                                    best_break = word_count
+                                    break
+                    # Split if good break point found
+                    if best_break > 0 and random.random() < 0.7:
+                        part1 = ' '.join(words[:best_break])
+                        part2 = ' '.join(words[best_break:])
+                        # Clean up punctuation
+                        part1 = part1.rstrip(',') + '.'
+                        # Capitalize second part appropriately
+                        if part2 and part2[0].islower() and not part2.startswith(('however', 'therefore', 'moreover')):
+                            part2 = part2[0].upper() + part2[1:]
+                        varied_sentences.append(part1)
+                        varied_sentences.append(part2)
                     else:
                         varied_sentences.append(sentence)
                 else:
+                    # Add natural variations more often (35% chance) - BUT CONTEXTUALLY
                     if i > 0 and random.random() < 0.35:
+                        # Check previous sentence ending to choose appropriate transition
+                        if varied_sentences and len(varied_sentences) > 0:
+                            prev_sent = varied_sentences[-1]
+                            # Choose transition based on relationship
+                            if any(w in sentence.lower() for w in ['however', 'but', 'although', 'despite']):
+                                transition = random.choice(['However, ', 'On the other hand, ', 'That said, ', 'Nevertheless, '])
+                            elif any(w in sentence.lower() for w in ['example', 'instance', 'such as', 'like']):
+                                transition = random.choice(['For instance, ', 'For example, ', 'To illustrate, ', 'Consider this: '])
+                            elif any(w in prev_sent.lower() for w in ['first', 'second', 'finally', 'lastly']):
+                                transition = random.choice(['Next, ', 'Additionally, ', 'Furthermore, ', 'Also, '])
+                            else:
+                                transition = random.choice(['Furthermore, ', 'Additionally, ', 'Moreover, ', 'Also, '])
+                            if sentence[0].isupper():
+                                sentence = transition + sentence[0].lower() + sentence[1:]
+                    # Add mid-sentence interruptions (10% chance) - ONLY WHERE NATURAL
+                    if random.random() < 0.1 and len(words) > 12:
+                        # Find natural pause points (after commas, before "which", etc.)
+                        pause_points = []
+                        for idx, word in enumerate(words):
+                            if word.endswith(',') and idx > len(words)//4 and idx < 3*len(words)//4:
+                                pause_points.append(idx + 1)
+                            elif word.lower() in ['which', 'that', 'who', 'where'] and idx > len(words)//3:
+                                pause_points.append(idx)
+                        if pause_points:
+                            pos = random.choice(pause_points)
+                            interruption = random.choice([
+                                " - and this is important - ",
+                                " - mind you - ",
+                                " - interestingly - ",
+                                " (worth noting) ",
+                                " - by the way - "
+                            ])
+                            words.insert(pos, interruption)
+                            sentence = ' '.join(words)
                     varied_sentences.append(sentence)
         # Post-process for additional human patterns
         result = ' '.join([s for s in varied_sentences if s])
+        # Add occasional fragments for human touch (5% chance) - ONLY AT APPROPRIATE PLACES
+        if random.random() < 0.05 and len(varied_sentences) > 3:
             sentences = result.split('. ')
+            # Add fragment after sentences that set up for it
+            for idx, sent in enumerate(sentences[:-1]):
+                if any(w in sent.lower() for w in ['amazing', 'incredible', 'surprising', 'interesting']):
+                    fragments = ["Truly remarkable.", "Quite something.", "Really makes you think."]
+                    sentences.insert(idx + 1, random.choice(fragments))
+                    break
+                elif any(w in sent.lower() for w in ['difficult', 'challenging', 'complex', 'complicated']):
+                    fragments = ["Not easy, for sure.", "Tough stuff.", "Challenging indeed."]
+                    sentences.insert(idx + 1, random.choice(fragments))
+                    break
+            result = '. '.join(sentences)
         return result