Spaces:

EdysorEdutech
/

human_final

Paused

App Files Files Community

EdysorEdutech commited on Aug 1

Commit

6b6a48c

verified ·

1 Parent(s): 270759b

Update app.py

Browse files

Files changed (1) hide show

app.py +378 -144

app.py CHANGED Viewed

@@ -674,7 +674,7 @@ class EnhancedDipperHumanizer:
         return modified_text, keyword_map
     def restore_keywords_robust(self, text, keyword_map):
-        """Restore keywords with more flexible pattern matching - FIXED VERSION"""
         if not keyword_map:
             return text
@@ -687,134 +687,362 @@ class EnhancedDipperHumanizer:
         # Track which positions have been replaced to avoid double replacement
         replaced_positions = set()
-        # First, create a reverse map for easier debugging
-        reverse_map = {}
         for placeholder, keyword in keyword_map.items():
-            # Extract number from placeholder
-            match = re.search(r'__KW(\d+)__', placeholder)
-            if match:
-                num = match.group(1)
-                reverse_map[num] = keyword
-                reverse_map[f'KW{num}'] = keyword
-                reverse_map[f'kw{num}'] = keyword
-        # CRITICAL FIX: Replace ALL variations of KW patterns
-        # Start with the most specific patterns first
-        all_patterns = []
         for placeholder, keyword in keyword_map.items():
             match = re.search(r'__KW(\d+)__', placeholder)
             if match:
                 num = match.group(1)
-                # Add all possible variations with this number
-                patterns_to_add = [
-                    # Complete patterns first
                     (f'__KW{num}__', keyword),
-                    (f'__KW{num}', keyword),
-                    (f'KW{num}__', keyword),
-                    (f'__kw{num}__', keyword),
                     (f'_KW{num}_', keyword),
-                    (f'_KW{num}', keyword),
-                    (f'KW{num}_', keyword),
-                    # Bare patterns
                     (f'KW{num}', keyword),
                     (f'kw{num}', keyword),
                     (f'Kw{num}', keyword),
-                    (f'KW{num.lstrip("0")}', keyword),  # Remove leading zeros
-                    # With punctuation
-                    (f'KW{num}.', keyword),
-                    (f'KW{num},', keyword),
-                    (f'KW{num}:', keyword),
-                    (f'KW{num};', keyword),
-                    (f'KW{num}!', keyword),
-                    (f'KW{num}?', keyword),
-                    (f'KW{num})', keyword),
-                    (f'(KW{num}', keyword),
-                    # Common corruptions
-                    (f'KW-{num}', keyword),
-                    (f'KW_{num}', keyword),
-                    (f'KW {num}', keyword),
-                    (f'K W{num}', keyword),
-                    (f'KVV{num}', keyword),
-                    (f'KKW{num}', keyword),
-                    (f'WK{num}', keyword),
-                    # Multiple underscores
                     (f'___KW{num}___', keyword),
                     (f'____KW{num}____', keyword),
                     (f'__KW{num}___', keyword),
                     (f'___KW{num}__', keyword),
                 ]
-                all_patterns.extend(patterns_to_add)
-        # Sort patterns by length (longest first) to avoid partial replacements
-        all_patterns.sort(key=lambda x: len(x[0]), reverse=True)
-        # Apply all patterns
-        for pattern, keyword in all_patterns:
-            if pattern in restored_text:
-                print(f"Found pattern '{pattern}', replacing with '{keyword}'")
-                restored_text = restored_text.replace(pattern, keyword)
-        # Special handling for isolated KW patterns
-        # This catches cases like "KW-KW", "KW-s", etc.
-        kw_pattern = r'\bKW(?:\d*)\b'
-        matches = list(re.finditer(kw_pattern, restored_text))
-        if matches and keyword_map:
-            # Use the first keyword as a fallback for unmatched KW patterns
-            fallback_keyword = list(keyword_map.values())[0]
-            for match in reversed(matches):  # Process from end to maintain positions
-                kw_text = match.group(0)
-                if kw_text not in ['KW' + k.split('KW')[1].split('__')[0] for k in keyword_map.keys()]:
-                    # This is an orphaned KW pattern
-                    start, end = match.span()
-                    print(f"Replacing orphaned '{kw_text}' with '{fallback_keyword}'")
-                    restored_text = restored_text[:start] + fallback_keyword + restored_text[end:]
-        # Final cleanup: Remove any remaining multiple underscores
-        restored_text = re.sub(r'_{2,}', ' ', restored_text)
-        # Remove any remaining KW patterns that weren't caught
-        # This is a last resort to clean up any stragglers
-        restored_text = re.sub(r'\bKW\d*\b', '', restored_text)
-        # Clean up any double spaces created
-        restored_text = re.sub(r'\s+', ' ', restored_text)
         # Final verification
-        remaining_kw = re.findall(r'KW\d+', restored_text)
-        if remaining_kw:
-            print(f"WARNING: Remaining KW patterns found: {remaining_kw}")
-        return restored_text.strip()
-    def final_kw_cleanup(self, text, keywords):
-        """Final cleanup to remove any remaining KW patterns"""
-        if not keywords:
-            return text
-        # Find all remaining KW patterns
-        kw_patterns = re.findall(r'\bKW\d*\b', text)
-        if kw_patterns:
-            print(f"Final cleanup: Found {len(kw_patterns)} remaining KW patterns")
-            # Replace each KW pattern with an appropriate keyword
-            for i, pattern in enumerate(kw_patterns):
-                # Use keywords cyclically if there are more patterns than keywords
-                keyword_index = i % len(keywords)
-                replacement = keywords[keyword_index]
-                # Replace the pattern
-                text = re.sub(r'\b' + re.escape(pattern) + r'\b', replacement, text, count=1)
-        return text
     def should_skip_element(self, element, text):
         """Determine if an element should be skipped from paraphrasing"""
@@ -1561,13 +1789,14 @@ class EnhancedDipperHumanizer:
         return html_text
     def wrap_keywords_in_paragraphs(self, soup, keywords):
-        """Wrap keywords with <strong> tags inside <p> tags only - FIXED VERSION"""
         if not keywords:
             return
         # Find all paragraph tags
         for p_tag in soup.find_all('p'):
             # Skip paragraphs that are inside special elements
             skip_parents = ['div.author-intro', 'div.cta-box', 'div.testimonial-card',
                           'div.news-box', 'button', 'a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
                           'div.quiz-container', 'div.question-container', 'div.results']
@@ -1575,6 +1804,7 @@ class EnhancedDipperHumanizer:
             # Check if this paragraph should be skipped
             should_skip = False
             for parent in p_tag.parents:
                 if parent.name == 'div' and parent.get('class'):
                     classes = parent.get('class', [])
                     if isinstance(classes, list):
@@ -1589,6 +1819,7 @@ class EnhancedDipperHumanizer:
                         should_skip = True
                         break
                 if parent.name in ['button', 'a', 'blockquote', 'details', 'summary']:
                     should_skip = True
                     break
@@ -1606,50 +1837,51 @@ class EnhancedDipperHumanizer:
             if any(skip_class in p_class_str for skip_class in ['testimonial-card', 'quiz-', 'stat-']):
                 continue
-            # NEW APPROACH: Process the entire paragraph's HTML at once
-            try:
-                # Get the paragraph's inner HTML as a string
-                p_html = str(p_tag.decode_contents())
-                # Track if we made any changes
-                modified = False
-                # Process each keyword
                 for keyword in keywords:
-                    # Create pattern that won't match if already in a tag
-                    # This regex ensures we don't wrap keywords that are already inside HTML tags
-                    pattern = r'(?<!<[^>]*)(?<!>)\b(' + re.escape(keyword) + r')\b(?![^<]*>)'
-                    # Count matches before replacement
-                    matches_before = len(re.findall(pattern, p_html, flags=re.IGNORECASE))
-                    if matches_before > 0:
-                        # Replace with strong tags, preserving original case
-                        p_html = re.sub(
-                            pattern,
-                            r'<strong>\1</strong>',
-                            p_html,
-                            flags=re.IGNORECASE
-                        )
-                        modified = True
-                # If we modified the HTML, update the paragraph
-                if modified:
-                    # Clear the paragraph
-                    p_tag.clear()
-                    # Parse the modified HTML and add it back
-                    # Use 'html.parser' to avoid encoding issues
-                    modified_soup = BeautifulSoup(p_html, 'html.parser')
-                    # Add all the parsed content back to the paragraph
-                    for element in modified_soup:
-                        p_tag.append(element)
-            except Exception as e:
-                print(f"Error processing paragraph for keywords: {str(e)}")
-                # If there's an error, skip this paragraph
-                continue
     def add_natural_flow_variations(self, text):
         """Add more natural flow and rhythm variations for Originality AI"""
@@ -1853,10 +2085,12 @@ class EnhancedDipperHumanizer:
             result = self.post_process_html(result)
             # Final safety check for any remaining placeholders or underscores
-            if '__KW' in result or re.search(r'_{3,}', result) or re.search(r'\bKW\d*\b', result):
-                print("Warning: Found placeholders or KW patterns in final HTML output")
                 # Attempt to clean them with keywords
-                result = self.final_kw_cleanup(result, all_keywords)
             # Restore all script tags
             for idx, script_content in enumerate(preserved_scripts):

         return modified_text, keyword_map
     def restore_keywords_robust(self, text, keyword_map):
+        """Restore keywords with more flexible pattern matching - ENHANCED VERSION"""
         if not keyword_map:
             return text
         # Track which positions have been replaced to avoid double replacement
         replaced_positions = set()
+        # First pass: Direct placeholder replacement
         for placeholder, keyword in keyword_map.items():
+            if placeholder in restored_text:
+                print(f"Found exact placeholder {placeholder}, replacing with {keyword}")
+                restored_text = restored_text.replace(placeholder, keyword)
+                # Mark positions as replaced
+                for match in re.finditer(re.escape(keyword), restored_text):
+                    replaced_positions.update(range(match.start(), match.end()))
+        # Second pass: Handle any mangled placeholders with EXPANDED patterns
         for placeholder, keyword in keyword_map.items():
+            # Extract the number from placeholder
             match = re.search(r'__KW(\d+)__', placeholder)
             if match:
                 num = match.group(1)
+                # EXPANDED patterns the model might create
+                patterns = [
+                    # Original patterns
                     (f'__KW{num}__', keyword),
+                    (f'__ KW{num}__', keyword),
+                    (f'__KW {num}__', keyword),
+                    (f'__ KW {num} __', keyword),
                     (f'_KW{num}_', keyword),
+                    (f'_kw{num}_', keyword),
+                    (f'KW{num}', keyword),
+                    (f'KW {num}', keyword),
+                    (f'__kw{num}__', keyword),
+                    (f'__Kw{num}__', keyword),
+                    (f'__ kw{num}__', keyword),
+                    (f'__KW{num}_', keyword),
+                    (f'_KW{num}__', keyword),
+                    (f'kw{num}', keyword),
+                    (f'``KW{num}__', keyword),
+                    (f'``KKW{num}', keyword),
+                    # NEW patterns for common corruptions
                     (f'KW{num}', keyword),
                     (f'kw{num}', keyword),
                     (f'Kw{num}', keyword),
+                    (f'K W{num}', keyword),
+                    (f'K w{num}', keyword),
+                    (f'k w{num}', keyword),
+                    # Patterns with punctuation corruption
+                    (f'__KW{num}__.', keyword),
+                    (f'__KW{num}__,', keyword),
+                    (f'__KW{num}__:', keyword),
+                    (f'__KW{num}__;', keyword),
+                    (f'.KW{num}', keyword),
+                    (f',KW{num}', keyword),
+                    (f':KW{num}', keyword),
+                    (f';KW{num}', keyword),
+                    (f'(KW{num})', keyword),
+                    (f'[KW{num}]', keyword),
+                    (f'"KW{num}"', keyword),
+                    (f"'KW{num}'", keyword),
+                    # Patterns with special characters
+                    (f'--KW{num}--', keyword),
+                    (f'==KW{num}==', keyword),
+                    (f'**KW{num}**', keyword),
+                    (f'##KW{num}##', keyword),
+                    (f'~~KW{num}~~', keyword),
+                    (f'//KW{num}//', keyword),
+                    (f'\\KW{num}\\', keyword),
+                    # Patterns with HTML entities
+                    (f'&lt;KW{num}&gt;', keyword),
+                    (f'&amp;KW{num}&amp;', keyword),
+                    (f'&#95;KW{num}&#95;', keyword),
+                    # Patterns with case variations
+                    (f'__kW{num}__', keyword),
+                    (f'__Kw{num}__', keyword),
+                    (f'__KW{num}__'.lower(), keyword),
+                    (f'__KW{num}__'.upper(), keyword),
+                    # Patterns with extra underscores
                     (f'___KW{num}___', keyword),
                     (f'____KW{num}____', keyword),
+                    (f'_____KW{num}_____', keyword),
                     (f'__KW{num}___', keyword),
                     (f'___KW{num}__', keyword),
+                    # Patterns with missing underscores
+                    (f'_KW{num}', keyword),
+                    (f'KW{num}_', keyword),
+                    (f'__KW{num}', keyword),
+                    (f'KW{num}__', keyword),
+                    # Patterns with dots instead of underscores
+                    (f'..KW{num}..', keyword),
+                    (f'.KW{num}.', keyword),
+                    (f'...KW{num}...', keyword),
+                    # Patterns with hyphens
+                    (f'-KW{num}-', keyword),
+                    (f'--KW{num}', keyword),
+                    (f'KW{num}--', keyword),
+                    # Patterns with spaces in the number
+                    (f'__KW {num}__', keyword),
+                    (f'__KW  {num}__', keyword),
+                    (f'__KW   {num}__', keyword),
+                    # Patterns with partial corruption
+                    (f'__{num}__', keyword),
+                    (f'__K{num}__', keyword),
+                    (f'__W{num}__', keyword),
+                    (f'__KW{num}', keyword),
+                    (f'KW{num}__', keyword),
+                    # Patterns with word boundaries
+                    (f'\\bKW{num}\\b', keyword),
+                    (f'\\b__KW{num}__\\b', keyword),
+                    # Patterns with newlines or tabs
+                    (f'\\nKW{num}\\n', keyword),
+                    (f'\\tKW{num}\\t', keyword),
+                    (f'\\rKW{num}\\r', keyword),
+                    # Patterns with common prefixes/suffixes
+                    (f'theKW{num}', keyword),
+                    (f'KW{num}the', keyword),
+                    (f'aKW{num}', keyword),
+                    (f'KW{num}a', keyword),
+                    (f'andKW{num}', keyword),
+                    (f'KW{num}and', keyword),
+                    (f'ofKW{num}', keyword),
+                    (f'KW{num}of', keyword),
+                    # Patterns with concatenation
+                    (f'KW{num}KW{num}', keyword),
+                    (f'KWKW{num}', keyword),
+                    (f'KW{num}{num}', keyword),
+                    # Patterns with zero-padding variations
+                    (f'__KW{num.zfill(3)}__', keyword),
+                    (f'__KW{num.zfill(4)}__', keyword),
+                    (f'__KW{num.lstrip("0")}__', keyword),
+                    # Patterns with brackets and braces
+                    (f'{{KW{num}}}', keyword),
+                    (f'<KW{num}>', keyword),
+                    (f'</KW{num}>', keyword),
+                    (f'<KW{num}/>', keyword),
+                    # Patterns with quotes variations
+                    (f'`KW{num}`', keyword),
+                    (f'```KW{num}```', keyword),
+                    (f"'''KW{num}'''", keyword),
+                    (f'"""KW{num}"""', keyword),
+                    # Patterns with markdown-style formatting
+                    (f'*KW{num}*', keyword),
+                    (f'_KW{num}_', keyword),
+                    (f'**KW{num}**', keyword),
+                    (f'__KW{num}__', keyword),
+                    (f'***KW{num}***', keyword),
+                    (f'___KW{num}___', keyword),
+                    # Patterns with common typos
+                    (f'__WK{num}__', keyword),
+                    (f'__KV{num}__', keyword),
+                    (f'__KQ{num}__', keyword),
+                    (f'__JW{num}__', keyword),
+                    (f'__LW{num}__', keyword),
+                    (f'__KE{num}__', keyword),
+                    (f'__KR{num}__', keyword),
+                    # Patterns with inserted characters
+                    (f'__K-W{num}__', keyword),
+                    (f'__K_W{num}__', keyword),
+                    (f'__K.W{num}__', keyword),
+                    (f'__K W{num}__', keyword),
+                    (f'__K/W{num}__', keyword),
+                    (f'__K\\W{num}__', keyword),
+                    # Patterns with duplicated parts
+                    (f'____KWKW{num}____', keyword),
+                    (f'__KWKW{num}__', keyword),
+                    (f'__KW{num}{num}__', keyword),
+                    (f'__KW{num}KW{num}__', keyword),
+                    # Patterns with reversed parts
+                    (f'__WK{num}__', keyword),
+                    (f'{num}KW__', keyword),
+                    (f'__{num}KW__', keyword),
+                    # Patterns with common OCR errors
+                    (f'__KVV{num}__', keyword),
+                    (f'__l<W{num}__', keyword),
+                    (f'__l(W{num}__', keyword),
+                    (f'__I<W{num}__', keyword),
+                    # Patterns with unicode variations
+                    (f'＿＿KW{num}＿＿', keyword),
+                    (f'__ＫＷ{num}__', keyword),
+                    (f'——KW{num}——', keyword),
+                    (f'‗‗KW{num}‗‗', keyword),
                 ]
+                # Apply patterns
+                for pattern, replacement in patterns:
+                    if pattern in restored_text:
+                        # Check if this position has already been replaced
+                        start_pos = restored_text.find(pattern)
+                        if start_pos != -1 and not any(pos in replaced_positions for pos in range(start_pos, start_pos + len(pattern))):
+                            print(f"Found pattern '{pattern}', replacing with {replacement}")
+                            restored_text = restored_text.replace(pattern, replacement, 1)
+                            # Mark new positions as replaced
+                            for match in re.finditer(re.escape(replacement), restored_text):
+                                replaced_positions.update(range(match.start(), match.end()))
+                            break
+        # Third pass: Use regex patterns for more complex variations
+        for placeholder, keyword in keyword_map.items():
+            match = re.search(r'__KW(\d+)__', placeholder)
+            if match:
+                num = match.group(1)
+                # Complex regex patterns
+                regex_patterns = [
+                    # Patterns with variable underscores
+                    (r'_{1,5}KW' + num + r'_{1,5}', keyword),
+                    (r'_{0,5}KW' + num + r'_{0,5}', keyword),
+                    # Patterns with any characters between K and W
+                    (r'__K.{0,3}W' + num + r'__', keyword),
+                    # Patterns with spaces and underscores mixed
+                    (r'[\s_]{1,5}KW' + num + r'[\s_]{1,5}', keyword),
+                    # Patterns with case-insensitive matching
+                    (r'(?i)__kw' + num + r'__', keyword),
+                    (r'(?i)kw' + num, keyword),
+                    # Patterns with word boundaries
+                    (r'\b[_]*KW' + num + r'[_]*\b', keyword),
+                    # Patterns with optional characters
+                    (r'_?_?KW' + num + r'_?_?', keyword),
+                    # Patterns with common separators
+                    (r'[-_\.]{0,3}KW' + num + r'[-_\.]{0,3}', keyword),
+                    # Patterns with HTML entities mixed in
+                    (r'&[a-z]+;?KW' + num + r'&[a-z]+;?', keyword),
+                    # Patterns for seriously mangled text
+                    (r'.{0,3}' + num + r'.{0,3}', keyword),  # Just the number with some chars
+                    # Patterns for split placeholders
+                    (r'__\s*KW\s*' + num + r'\s*__', keyword),
+                    (r'_\s*_\s*K\s*W\s*' + num + r'\s*_\s*_', keyword),
+                ]
+                for pattern, replacement in regex_patterns:
+                    matches = list(re.finditer(pattern, restored_text))
+                    for match in matches:
+                        start, end = match.span()
+                        if not any(pos in replaced_positions for pos in range(start, end)):
+                            print(f"Found regex pattern '{pattern}' at position {start}-{end}, replacing with {replacement}")
+                            before = restored_text[:start]
+                            after = restored_text[end:]
+                            restored_text = before + replacement + after
+                            replaced_positions.update(range(start, start + len(replacement)))
+                            break
+        # Fourth pass: Smart underscore replacement
+        # Count underscores and keywords to make intelligent replacements
+        underscore_groups = list(re.finditer(r'_{2,}', restored_text))
+        remaining_keywords = [kw for kw in keyword_map.values() if kw not in restored_text]
+        if underscore_groups and remaining_keywords:
+            print(f"Found {len(underscore_groups)} underscore groups and {len(remaining_keywords)} unused keywords")
+            # Sort underscore groups by length (descending) to prioritize longer ones
+            underscore_groups.sort(key=lambda x: x.end() - x.start(), reverse=True)
+            for i, underscore_match in enumerate(underscore_groups):
+                if i < len(remaining_keywords):
+                    start, end = underscore_match.span()
+                    if not any(pos in replaced_positions for pos in range(start, end)):
+                        keyword = remaining_keywords[i]
+                        before = restored_text[:start]
+                        after = restored_text[end:]
+                        restored_text = before + keyword + after
+                        replaced_positions.update(range(start, start + len(keyword)))
+                        print(f"Replaced underscore group at {start}-{end} with keyword: {keyword}")
+        # Fifth pass: Context-aware replacement
+        # Look for patterns where keywords might make sense
+        for placeholder, keyword in keyword_map.items():
+            if keyword not in restored_text:
+                # Look for sentences or phrases that seem to be missing the keyword
+                # Common patterns where keywords might be missing
+                context_patterns = [
+                    r'the\s+(?:is|are|was|were)\s+',  # "the ___ is"
+                    r'of\s+(?:the\s+)?',  # "of the ___"
+                    r'for\s+(?:the\s+)?',  # "for the ___"
+                    r'in\s+(?:the\s+)?',  # "in the ___"
+                    r'with\s+(?:the\s+)?',  # "with the ___"
+                    r'about\s+(?:the\s+)?',  # "about the ___"
+                    r'using\s+(?:the\s+)?',  # "using the ___"
+                    r'through\s+(?:the\s+)?',  # "through the ___"
+                ]
+                for pattern in context_patterns:
+                    matches = list(re.finditer(pattern + r'([A-Z]{2,}\d+|\b\w{1,3}\b)', restored_text))
+                    for match in matches:
+                        suspicious_word = match.group(1)
+                        # Check if this looks like a mangled placeholder
+                        if re.match(r'^[A-Z]{1,3}\d+$', suspicious_word) or len(suspicious_word) <= 3:
+                            start = match.start(1)
+                            end = match.end(1)
+                            if not any(pos in replaced_positions for pos in range(start, end)):
+                                before = restored_text[:start]
+                                after = restored_text[end:]
+                                restored_text = before + keyword + after
+                                replaced_positions.update(range(start, start + len(keyword)))
+                                print(f"Context-aware replacement: replaced '{suspicious_word}' with '{keyword}'")
+                                break
+        # Final cleanup passes
+        # Remove any remaining placeholder artifacts
+        cleanup_patterns = [
+            (r'``+', ''),  # Remove backticks
+            (r'__+', ' '),  # Replace multiple underscores with space
+            (r'--+', '-'),  # Normalize dashes
+            (r'\s{2,}', ' '),  # Normalize spaces
+            (r'([.,!?])\s*\1+', r'\1'),  # Remove duplicate punctuation
+        ]
+        for pattern, replacement in cleanup_patterns:
+            restored_text = re.sub(pattern, replacement, restored_text)
+        # Ensure proper spacing around keywords
+        for keyword in keyword_map.values():
+            if keyword in restored_text:
+                # Fix spacing issues around the keyword
+                restored_text = re.sub(r'(\w)(' + re.escape(keyword) + r')', r'\1 \2', restored_text)
+                restored_text = re.sub(r'(' + re.escape(keyword) + r')(\w)', r'\1 \2', restored_text)
+                # Remove duplicate spaces
+                restored_text = re.sub(r'\s+', ' ', restored_text)
         # Final verification
+        for placeholder, keyword in keyword_map.items():
+            if keyword not in restored_text:
+                print(f"WARNING: Keyword '{keyword}' still missing from final text!")
+        # Log final result
+        print(f"Final restored text: {restored_text[:100]}...")
+        return restored_text.strip()
     def should_skip_element(self, element, text):
         """Determine if an element should be skipped from paraphrasing"""
         return html_text
     def wrap_keywords_in_paragraphs(self, soup, keywords):
+        """Wrap keywords with <strong> tags inside <p> tags only"""
         if not keywords:
             return
         # Find all paragraph tags
         for p_tag in soup.find_all('p'):
             # Skip paragraphs that are inside special elements
+            # Check if paragraph is inside any of these elements
             skip_parents = ['div.author-intro', 'div.cta-box', 'div.testimonial-card',
                           'div.news-box', 'button', 'a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
                           'div.quiz-container', 'div.question-container', 'div.results']
             # Check if this paragraph should be skipped
             should_skip = False
             for parent in p_tag.parents:
+                # Check by class
                 if parent.name == 'div' and parent.get('class'):
                     classes = parent.get('class', [])
                     if isinstance(classes, list):
                         should_skip = True
                         break
+                # Check by tag name
                 if parent.name in ['button', 'a', 'blockquote', 'details', 'summary']:
                     should_skip = True
                     break
             if any(skip_class in p_class_str for skip_class in ['testimonial-card', 'quiz-', 'stat-']):
                 continue
+            # Process only if this is a regular content paragraph
+            # Get all text nodes in this paragraph
+            for text_node in p_tag.find_all(string=True):
+                # Skip if already inside a strong or b tag
+                if text_node.parent.name in ['strong', 'b', 'em', 'i', 'span', 'a']:
+                    continue
+                # Skip if the text node's immediate parent isn't the p tag
+                # (to avoid nested elements)
+                if text_node.parent != p_tag:
+                    continue
+                original_text = str(text_node)
+                # Skip very short text nodes
+                if len(original_text.strip()) < 20:
+                    continue
+                modified_text = original_text
+                # Check each keyword
                 for keyword in keywords:
+                    # Use word boundaries for accurate matching
+                    pattern = r'\b' + re.escape(keyword) + r'\b'
+                    # Find all matches (case-insensitive)
+                    matches = list(re.finditer(pattern, modified_text, flags=re.IGNORECASE))
+                    # Replace from end to beginning to maintain positions
+                    for match in reversed(matches):
+                        start, end = match.span()
+                        matched_text = match.group(0)
+                        # Wrap with strong tag
+                        modified_text = (modified_text[:start] +
+                                       f'<strong>{matched_text}</strong>' +
+                                       modified_text[end:])
+                # If text was modified, replace the text node
+                if modified_text != original_text:
+                    # Parse the modified text to create new nodes
+                    new_soup = BeautifulSoup(modified_text, 'html.parser')
+                    # Replace the text node with the new nodes
+                    for new_node in reversed(new_soup.contents):
+                        text_node.insert_after(new_node)
+                    text_node.extract()
     def add_natural_flow_variations(self, text):
         """Add more natural flow and rhythm variations for Originality AI"""
             result = self.post_process_html(result)
             # Final safety check for any remaining placeholders or underscores
+            if '__KW' in result or re.search(r'_{3,}', result):
+                print("Warning: Found placeholders or multiple underscores in final HTML output")
                 # Attempt to clean them with keywords
+                for i, keyword in enumerate(all_keywords):
+                    result = result.replace(f'__KW{i:03d}__', keyword)
+                    result = re.sub(r'_{3,}', keyword, result, count=1)
             # Restore all script tags
             for idx, script_content in enumerate(preserved_scripts):