import nltk
import random
import re
import spacy
import numpy as np
from nltk.corpus import wordnet
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tag import pos_tag
from collections import defaultdict
from transformers import pipeline
spacy.load('en_core_web_sm')

# With this code:
try:
    nltk.data.find('tokenizers/punkt')
    nltk.data.find('taggers/averaged_perceptron_tagger')
    nltk.data.find('corpora/wordnet')
except LookupError as e:
    print(f"NLTK resource error: {e}")
    print("Attempting to download missing resources...")
    nltk.download('punkt')
    nltk.download('punkt_tab')
    nltk.download('averaged_perceptron_tagger')
    nltk.download('wordnet')
class EnhancedTextHumanizer:
    def __init__(self):
        # Load spaCy model
        self.nlp = spacy.load("en_core_web_sm")
        
        # Initialize sentiment analysis pipeline - for emotional variations
        try:
            self.sentiment_analyzer = pipeline("sentiment-analysis")
        except:
            print("Warning: Transformers sentiment analysis not available. Emotional variations will be limited.")
            self.sentiment_analyzer = None
        
        # Sentence-level transformations
        self.sentence_transformations = [
            self.merge_sentences,
            self.split_sentences,
            self.passive_to_active,
            self.active_to_passive,
            self.add_hedging,
            self.add_intensifiers,
            self.add_rhetorical_question,
            self.add_aside_comment
        ]
        
        # Word-level transformations
        self.word_transformations = [
            self.contextual_synonym_replacement,
            self.contraction_expansion,
            self.add_filler_words,
            self.informal_substitution,
            self.add_emphatic_repetition
        ]
        
        # Paragraph-level transformations
        self.paragraph_transformations = [
            self.add_discourse_markers,
            self.adjust_formality,
            self.add_cohesion_devices
        ]
        
        # Filler words with categorization
        self.filler_words = {
            'hesitation': ["um", "uh", "er", "hmm", "like"],
            'emphasis': ["actually", "literally", "seriously", "honestly", "truly", "really"],
            'hedging': ["maybe", "perhaps", "probably", "possibly", "somewhat", "kinda", "sort of"],
            'clarification': ["I mean", "you know", "what I'm saying", "in other words"],
            'informal': ["basically", "totally", "absolutely", "pretty much", "y'know"]
        }
        
        # Contractions and their expansions
        self.contractions = {
            "can't": "cannot", "won't": "will not", "don't": "do not",
            "doesn't": "does not", "I'm": "I am", "you're": "you are",
            "they're": "they are", "we're": "we are", "it's": "it is",
            "that's": "that is", "who's": "who is", "what's": "what is",
            "there's": "there is", "here's": "here is", "he's": "he is",
            "she's": "she is", "I've": "I have", "you've": "you have",
            "we've": "we have", "they've": "they have", "I'll": "I will",
            "you'll": "you will", "he'll": "he will", "she'll": "she will",
            "we'll": "we will", "they'll": "they will", "I'd": "I would",
            "you'd": "you would", "he'd": "he would", "she'd": "she would",
            "we'd": "we would", "they'd": "they would", "let's": "let us",
            "ain't": "am not", "wasn't": "was not", "weren't": "were not",
            "hasn't": "has not", "haven't": "have not", "couldn't": "could not",
            "shouldn't": "should not", "wouldn't": "would not", "didn't": "did not",
            "isn't": "is not", "aren't": "are not", "mightn't": "might not",
            "mustn't": "must not", "shan't": "shall not", "needn't": "need not"
        }
        
        # Reverse contractions dictionary
        self.expansions = {v: k for k, v in self.contractions.items()}
        
        # Common hedging phrases
        self.hedging_phrases = [
            "I think", "It seems", "It appears", "From what I understand",
            "As far as I know", "In my opinion", "Arguably", "Presumably",
            "It could be that", "It's possible that", "One might say",
            "To some extent", "More or less", "Kind of", "Sort of"
        ]
        
        # Intensifiers for emotional emphasis
        self.intensifiers = [
            "very", "extremely", "incredibly", "remarkably", "absolutely",
            "totally", "completely", "utterly", "entirely", "thoroughly",
            "ridiculously", "insanely", "super", "really", "quite",
            "unbelievably", "amazingly", "surprisingly", "exceptionally"
        ]
        
        # Discourse markers for transitions
        self.discourse_markers = {
            'contrast': ["however", "but", "nevertheless", "on the other hand", "conversely", "in contrast", "yet"],
            'addition': ["moreover", "furthermore", "additionally", "also", "besides", "in addition", "plus"],
            'cause_effect': ["therefore", "consequently", "thus", "hence", "as a result", "so", "because of this"],
            'sequence': ["first", "second", "next", "then", "finally", "subsequently", "later"],
            'example': ["for example", "for instance", "specifically", "to illustrate", "such as", "namely"],
            'conclusion': ["in conclusion", "to sum up", "in summary", "overall", "ultimately", "in the end"],
            'emphasis': ["indeed", "certainly", "in fact", "obviously", "clearly", "notably", "significantly"]
        }
        
        # Informal substitutions
        self.informal_words = {
            "approximately": ["about", "around"],
            "assistance": ["help", "a hand"],
            "attempt": ["try", "shot", "stab"],
            "communicate": ["talk", "chat", "get in touch"],
            "comprehend": ["get", "understand"],
            "concerning": ["about", "on"],
            "consume": ["eat", "drink", "use up"],
            "currently": ["now", "right now"],
            "decrease": ["drop", "cut", "fall"],
            "difficult": ["hard", "tough"],
            "encounter": ["meet", "run into", "bump into"],
            "endeavor": ["try", "take a shot"],
            "excessive": ["too much", "over the top"],
            "expedite": ["speed up", "hurry"],
            "facilitate": ["help", "make easier"],
            "frequently": ["often", "a lot"],
            "fundamental": ["basic", "key"],
            "utilize": ["use"],
            "purchase": ["buy", "get"],
            "sufficient": ["enough"],
            "inquire": ["ask"],
            "obtain": ["get"],
            "require": ["need"],
            "additional": ["more", "extra"],
            "residence": ["home", "place"]
        }
        
        # Common speech error patterns
        self.speech_errors = {
            'restarts': self._generate_restart,
            'repetitions': self._generate_repetition,
            'corrections': self._generate_correction,
            'filled_pauses': self._generate_filled_pause,
            'agreement_errors': self._generate_agreement_error
        }
        
        # Regional dialect variations (simplified)
        self.regional_variations = {
            'us_south': {
                'you all': "y'all",
                'going to': "gonna",
                'want to': "wanna",
                'did not': "didn't",
                'yes': "yep",
                'no': "nope"
            },
            'british': {
                'apartment': "flat",
                'elevator': "lift",
                'trash': "rubbish",
                'sidewalk': "pavement",
                'vacation': "holiday",
                'soccer': "football"
            }
        }
        
        # Personality profiles (simplified)
        self.personality_profiles = {
            'casual': {
                'contraction_rate': 0.8,
                'informal_rate': 0.7,
                'hedging_rate': 0.3,
                'filler_rate': 0.4,
                'error_rate': 0.1
            },
            'formal': {
                'contraction_rate': 0.2,
                'informal_rate': 0.1,
                'hedging_rate': 0.5,
                'filler_rate': 0.1,
                'error_rate': 0.05
            },
            'academic': {
                'contraction_rate': 0.1,
                'informal_rate': 0.05,
                'hedging_rate': 0.6,
                'filler_rate': 0.1,
                'error_rate': 0.02
            },
            'enthusiastic': {
                'contraction_rate': 0.6,
                'informal_rate': 0.5,
                'hedging_rate': 0.2,
                'filler_rate': 0.3,
                'error_rate': 0.1,
                'intensifier_rate': 0.7
            }
        }
        
        # Emotional expression templates
        self.emotional_expressions = {
            'positive': [
                "I'm so happy about {topic}!",
                "This is amazing: {sentence}",
                "I love how {sentence}",
                "Wow, {sentence} That's incredible!",
                "I'm really excited about {topic}."
            ],
            'negative': [
                "I'm not too thrilled about {topic}.",
                "Unfortunately, {sentence}",
                "I'm concerned that {sentence}",
                "This is disappointing: {sentence}",
                "I'm a bit worried about {topic}."
            ],
            'neutral': [
                "In my view, {sentence}",
                "I think {sentence}",
                "From what I understand, {sentence}",
                "My take on {topic} is that {sentence}",
                "When it comes to {topic}, {sentence}"
            ]
        }
    
    def humanize_text(self, text, intensity=0.5, personality='casual', add_errors=True, regional_dialect=None, emotional_tone=None):
        """
        Enhanced main function to humanize text with multiple parameters for customization.
        
        Args:
            text (str): The input text to humanize
            intensity (float): Controls how much the text is transformed (0.0 to 1.0)
            personality (str): Personality profile to use ('casual', 'formal', 'academic', 'enthusiastic')
            add_errors (bool): Whether to add realistic speech/typing errors
            regional_dialect (str): Regional dialect to incorporate (None, 'us_south', 'british')
            emotional_tone (str): Overall emotional tone (None, 'positive', 'negative', 'neutral')
            
        Returns:
            str: Humanized text
        """
        if intensity < 0 or intensity > 1:
            raise ValueError("Intensity must be between 0.0 and 1.0")
        
        # Apply personality profile
        profile = self.personality_profiles.get(personality, self.personality_profiles['casual'])
        
        # Parse the text with spaCy for better linguistic analysis
        doc = self.nlp(text)
        
        # Split text into paragraphs
        paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
        
        # Apply paragraph-level transformations
        transformed_paragraphs = []
        for para in paragraphs:
            # Analyze sentiment if available
            sentiment = self._analyze_sentiment(para) if emotional_tone is None and self.sentiment_analyzer else None
            current_tone = emotional_tone or (sentiment['label'].lower() if sentiment else 'neutral')
            
            # Add emotional expressions based on tone
            if random.random() < intensity * 0.3 and current_tone in self.emotional_expressions:
                # Find a topic in the paragraph
                topic = self._extract_topic(para)
                expression = random.choice(self.emotional_expressions[current_tone])
                para = expression.format(topic=topic, sentence=para.lower() if para[0].isupper() else para)
            
            # Apply paragraph transformations
            for transform in self.paragraph_transformations:
                if random.random() < intensity * 0.4:
                    para = transform(para, profile)
            
            # Split paragraph into sentences
            sentences = sent_tokenize(para)
            
            # Apply sentence-level transformations
            transformed_sentences = self._apply_sentence_transformations(sentences, intensity, profile)
            
            # Apply regional dialect if specified
            if regional_dialect and regional_dialect in self.regional_variations:
                transformed_para = " ".join(transformed_sentences)
                for original, variant in self.regional_variations[regional_dialect].items():
                    # Use word boundaries to avoid partial replacements
                    pattern = r'\b' + re.escape(original) + r'\b'
                    if random.random() < intensity * 0.7:
                        transformed_para = re.sub(pattern, variant, transformed_para, flags=re.IGNORECASE)
                transformed_paragraphs.append(transformed_para)
            else:
                transformed_paragraphs.append(" ".join(transformed_sentences))
        
        # Join paragraphs
        result = "\n\n".join(transformed_paragraphs)
        
        # Apply word-level transformations
        result = self._apply_word_transformations(result, intensity, profile)
        
        # Introduce speech errors if enabled
        if add_errors and intensity > 0.2:
            result = self._introduce_speech_errors(result, intensity * profile.get('error_rate', 0.1))
            
        # Normalize spacing around punctuation
        result = self._normalize_spacing(result)
        
        return result
    
    def _analyze_sentiment(self, text):
        """Analyze sentiment of the text using the sentiment analyzer."""
        if self.sentiment_analyzer:
            try:
                return self.sentiment_analyzer(text)[0]
            except:
                pass
        return None
    
    def _extract_topic(self, text):
        """Extract a potential topic from the text using spaCy."""
        doc = self.nlp(text)
        
        # Try to find entities
        entities = list(doc.ents)
        if entities:
            return entities[0].text
        
        # Try to find noun chunks
        chunks = list(doc.noun_chunks)
        if chunks:
            return chunks[0].text
        
        # Fallback to first sentence
        sentences = sent_tokenize(text)
        if sentences:
            words = word_tokenize(sentences[0])
            if words:
                return words[0]
        
        return "this"
    
    def _apply_sentence_transformations(self, sentences, intensity, profile):
        """Apply various sentence-level transformations with personality profile influence."""
        result = []
        i = 0
        
        while i < len(sentences):
            # Randomly decide whether to apply a transformation
            if random.random() < intensity * 0.7:
                # Weight transformations based on personality
                weights = [
                    1.0,  # merge_sentences
                    0.8,  # split_sentences
                    0.5 if profile.get('hedging_rate', 0.3) > 0.4 else 0.2,  # passive_to_active
                    0.2 if profile.get('hedging_rate', 0.3) > 0.4 else 0.5,  # active_to_passive
                    profile.get('hedging_rate', 0.3),  # add_hedging
                    profile.get('intensifier_rate', 0.4),  # add_intensifiers
                    0.3 if profile.get('informal_rate', 0.5) > 0.5 else 0.1,  # add_rhetorical_question
                    0.4 if profile.get('informal_rate', 0.5) > 0.4 else 0.2,  # add_aside_comment
                ]
                
                # Normalize weights
                weights = [w / sum(weights) for w in weights]
                
                # Choose a transformation based on weights
                transformation = random.choices(self.sentence_transformations, weights=weights)[0]
                
                # For transformations requiring two sentences
                if transformation in [self.merge_sentences] and i < len(sentences) - 1:
                    transformed = transformation(sentences[i], sentences[i+1])
                    result.append(transformed)
                    i += 2
                # For transformations requiring one sentence
                else:
                    transformed = transformation(sentences[i], "")
                    result.append(transformed)
                    i += 1
            else:
                result.append(sentences[i])
                i += 1
                
        return result
    
    def _apply_word_transformations(self, text, intensity, profile):
        """Apply various word-level transformations with personality profile influence."""
        # Parse the text with spaCy for better context
        doc = self.nlp(text)
        
        # Apply transformations separately to preserve sentence structure
        sentences = [sent.text for sent in doc.sents]
        transformed_sentences = []
        
        for sentence in sentences:
            # Tokenize sentence
            sentence_doc = self.nlp(sentence)
            
            # Build a new sentence from tokens
            new_tokens = []
            i = 0
            
            while i < len(sentence_doc):
                token = sentence_doc[i]
                
                # Skip punctuation for most transformations
                if token.is_punct:
                    new_tokens.append(token.text)
                    i += 1
                    continue
                
                # Randomly decide whether to apply a transformation
                if random.random() < intensity * 0.5:
                    # Weight transformations based on personality
                    weights = [
                        0.6,  # contextual_synonym_replacement
                        profile.get('contraction_rate', 0.5),  # contraction_expansion
                        profile.get('filler_rate', 0.3),  # add_filler_words
                        profile.get('informal_rate', 0.5),  # informal_substitution
                        0.3 if profile.get('intensifier_rate', 0.4) > 0.5 else 0.1,  # add_emphatic_repetition
                    ]
                    
                    # Normalize weights
                    weights = [w / sum(weights) for w in weights]
                    
                    # Choose a transformation based on weights
                    transformation = random.choices(self.word_transformations, weights=weights)[0]
                    
                    # Apply transformation
                    if transformation == self.contextual_synonym_replacement:
                        transformed = transformation(token, sentence_doc)
                    elif transformation == self.contraction_expansion:
                        # Need to check if this is a multi-word expansion
                        if token.text.lower() in self.expansions:
                            # This is a potential expansion point
                            expansion = self.expansions[token.text.lower()]
                            if ' ' in expansion and i < len(sentence_doc) - 1:
                                # Check if the next tokens match the expansion
                                expansion_parts = expansion.split()
                                if expansion_parts[0].lower() == token.text.lower() and expansion_parts[1].lower() == sentence_doc[i+1].text.lower():
                                    # Apply contraction
                                    transformed = expansion
                                    i += 1  # Skip the next token
                                else:
                                    transformed = token.text
                            else:
                                transformed = token.text
                        else:
                            transformed = transformation(token)
                    elif transformation == self.add_filler_words:
                        # Add a filler word before the current word
                        if random.random() < 0.3:  # Only occasionally add fillers
                            filler_category = random.choice(list(self.filler_words.keys()))
                            filler = random.choice(self.filler_words[filler_category])
                            new_tokens.append(filler)
                        transformed = token.text
                    elif transformation == self.informal_substitution:
                        transformed = transformation(token)
                    elif transformation == self.add_emphatic_repetition:
                        transformed = transformation(token)
                    else:
                        transformed = token.text
                        
                    new_tokens.append(transformed)
                else:
                    new_tokens.append(token.text)
                    
                i += 1
                
            # Recreate the sentence from tokens
            transformed_sentence = self._reconstruct_sentence(new_tokens)
            transformed_sentences.append(transformed_sentence)
        
        # Join the transformed sentences
        result = " ".join(transformed_sentences)
        
        return result
    
    def _reconstruct_sentence(self, tokens):
        """Reconstruct a sentence from tokens, preserving proper spacing."""
        result = ""
        for i, token in enumerate(tokens):
            # Handle special cases for punctuation
            if token in ".,!?;:)]}" and result:
                result = result.rstrip() + token + " "
            # Don't add space after opening brackets
            elif i > 0 and tokens[i-1] in "([{" and result:
                result = result.rstrip() + token + " "
            elif token in "([{" and result:
                result = result.rstrip() + token
            # Handle quotes
            elif token in ['\'', '"'] and result and result[-1] != " ":
                result += token + " "
            else:
                result += token + " "
        
        return result.strip()
    
    def _introduce_speech_errors(self, text, error_rate):
        """Introduce realistic speech/typing errors."""
        words = text.split()
        result = []
        
        for i, word in enumerate(words):
            if len(word) > 2 and random.random() < error_rate:
                # Select a random error type
                error_type = random.choice(list(self.speech_errors.keys()))
                
                # Apply the error
                error_func = self.speech_errors[error_type]
                if error_type in ['restarts', 'repetitions'] and i > 0:
                    # These errors need previous context
                    modified = error_func(words[i-1], word)
                    result.pop()  # Remove the previous word
                    result.append(modified)
                else:
                    result.append(error_func(word))
            else:
                result.append(word)
        
        return " ".join(result)
    
    def _normalize_spacing(self, text):
        """Fix spacing around punctuation for a more natural look."""
        # Fix spacing around punctuation
        text = re.sub(r'\s+([.,;:!?)])', r'\1', text)
        text = re.sub(r'([(])\s+', r'\1', text)
        
        # Fix multiple spaces
        text = re.sub(r'\s{2,}', ' ', text)
        
        return text
    
    # Speech error generators
    def _generate_restart(self, prev_word, word):
        """Generate a restart error (e.g., "I was- I was saying")."""
        return f"{prev_word}- {prev_word} {word}"
    
    def _generate_repetition(self, prev_word, word):
        """Generate a word repetition (e.g., "the the cat")."""
        return f"{prev_word} {prev_word} {word}"
    
    def _generate_correction(self, word):
        """Generate a self-correction (e.g., "teh the")."""
        if len(word) < 4:
            return word
            
        # Create a simple typo
        i = random.randint(0, len(word) - 2)
        typo = word[:i] + word[i+1] + word[i] + word[i+2:]
        
        # Choose correction style
        correction_style = random.choice(["asterisk", "dash", "explicit"])
        if correction_style == "asterisk":
            return f"{typo}*{word}*"
        elif correction_style == "dash":
            return f"{typo}-{word}"
        else:
            return f"{typo}, I mean {word}"
    
    def _generate_filled_pause(self, word):
        """Generate a filled pause (e.g., "um, like")."""
        filler = random.choice(self.filler_words['hesitation'])
        return f"{filler}, {word}"
    
    def _generate_agreement_error(self, word):
        """Generate a subject-verb agreement error (simplified)."""
        if word.endswith('s') and len(word) > 3:
            return word[:-1]
        elif not word.endswith('s') and random.random() < 0.5:
            return word + 's'
        return word
    
    # Enhanced sentence-level transformations
    def merge_sentences(self, sent1, sent2):
        """Merge two sentences with a conjunction."""
        # Remove the period from the first sentence
        if sent1.endswith('.'):
            sent1 = sent1[:-1]
            
        # Choose a conjunction based on the content
        doc1 = self.nlp(sent1)
        doc2 = self.nlp(sent2)
        
        # Check for content relationship
        similarity = doc1.similarity(doc2)
        
        if similarity > 0.7:
            # Highly similar, use addition
            conjunction = random.choice(["and", "also", "moreover", "furthermore"])
        elif similarity < 0.3:
            # Dissimilar, use contrast
            conjunction = random.choice(["but", "however", "on the other hand", "yet"])
        else:
            # Moderate similarity, use general conjunction
            conjunction = random.choice(["and", "while", "so", "because", "although"])
        
        # Merge the sentences
        return f"{sent1} {conjunction} {sent2.lower() if sent2 and sent2[0].isupper() else sent2}"
    
    def split_sentences(self, sent, _):
        """Split a longer sentence into two with improved linguistic awareness."""
        doc = self.nlp(sent)
        tokens = [token for token in doc]
        
        # Only split if sentence is long enough
        if len(tokens) < 8:
            return sent
            
        # Find a good split point based on dependency structure
        potential_splits = []
        for i, token in enumerate(tokens):
            # Good split points are often after conjunctions or punctuation
            if (token.dep_ in ['cc', 'prep', 'mark'] or token.pos_ == 'PUNCT') and 3 < i < len(tokens) - 3:
                potential_splits.append((i, 1))  # Higher weight for these
            # Or before a new clause
            elif token.dep_ in ['nsubj', 'nsubjpass'] and i > 3:
                potential_splits.append((i, 0.8))
        
        if not potential_splits:
            # Fallback to middle
            split_point = len(tokens) // 2
        else:
            # Choose a split point with weighted random selection
            points, weights = zip(*potential_splits)
            split_point = random.choices(points, weights=weights)[0]
        
        # Create two new sentences
        sent1 = "".join([t.text_with_ws for t in tokens[:split_point]])
        sent2 = "".join([t.text_with_ws for t in tokens[split_point:]])
        
        # Ensure proper capitalization and punctuation
        sent1 = sent1.rstrip()
        if not sent1.endswith(('.', '!', '?')):
            sent1 += '.'
            
        sent2 = sent2.strip()
        if sent2 and sent2[0].islower():
            sent2 = sent2[0].upper() + sent2[1:]
            
        return f"{sent1} {sent2}"
    
    def passive_to_active(self, sent, _):
        """Convert passive voice to active voice using spaCy's dependency parsing."""
        doc = self.nlp(sent)
        
        # Look for passive constructions
        for token in doc:
            if token.dep_ == "nsubjpass":
                # Found passive voice
                subject = token
                agent = None
                verb = token.head
                
                # Find the agent (often introduced by "by")
                for child in doc:
                    if child.dep_ == "agent" and child.head == verb:
                        for grandchild in child.children:
                            if grandchild.dep_ in ["pobj", "nmod"]:
                                agent = grandchild
                                break
                
                if agent:
                    # Extract the core components
                    subj_span = doc[subject.left_edge.i:subject.right_edge.i+1].text
                    verb_span = doc[verb.i:verb.i+1].text
                    agent_span = doc[agent.left_edge.i:agent.right_edge.i+1].text
                    
                    # Reconstruct in active voice
                    active_verb = verb_span.replace("was ", "").replace("were ", "")
                    # Remove trailing period for reconstruction
                    if sent.endswith('.'):
                        new_sent = f"{agent_span} {active_verb} {subj_span}."
                    else:
                        new_sent = f"{agent_span} {active_verb} {subj_span}"
                    
                    return new_sent
        
        # If no passive construction found or couldn't convert
        return sent
    
    def active_to_passive(self, sent, _):
        """Convert active voice to passive voice using spaCy's dependency parsing."""
        doc = self.nlp(sent)
        
        # Look for active voice constructions
        for token in doc:
            if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
                # Found a subject and verb
                subject = token
                verb = token.head
                
                # Find the direct object
                obj = None
                for child in verb.children:
                    if child.dep_ in ["dobj", "obj"]:
                        obj = child
                        break
                
                if obj:
                    # Extract the core components
                    subj_span = doc[subject.left_edge.i:subject.right_edge.i+1].text
                    verb_span = doc[verb.i:verb.i+1].text
                    obj_span = doc[obj.left_edge.i:obj.right_edge.i+1].text
                    
                    # Determine the passive verb form
                    passive_verb = verb_span
                    if verb_span.endswith("s"):
                        passive_verb = passive_verb[:-1]
                    
                    # Reconstruct in passive voice
                    # Remove trailing period for reconstruction
                    if sent.endswith('.'):
                        new_sent = f"{obj_span} was {passive_verb}ed by {subj_span}."
                    else:
                        new_sent = f"{obj_span} was {passive_verb}ed by {subj_span}"
                    
                    return new_sent
        
        # If no active construction found or couldn't convert
        return sent
    
    def add_hedging(self, sent, _):
        """Add hedging language to a statement."""
        # Add a hedging phrase at the beginning of the sentence
        hedging = random.choice(self.hedging_phrases)
        
        # For questions, add hedging at the end
        if sent.endswith('?'):
            return f"{sent[:-1]}, {hedging.lower()}?"
        
        # For statements, add at the beginning
        if sent[0].isupper():
            return f"{hedging}, {sent[0].lower() + sent[1:]}"
        return f"{hedging}, {sent}"
    

    def add_intensifiers(self, sent, _):
        """Add intensifiers to adjectives and adverbs."""
        doc = self.nlp(sent)
        words = list(doc)
        result = []
        
        for i, token in enumerate(words):
            # Add intensifier before adjectives and adverbs
            if token.pos_ in ["ADJ", "ADV"] and random.random() < 0.6:
                # Choose an appropriate intensifier
                intensifier = random.choice(self.intensifiers)
                # Add the intensifier before the adjective/adverb
                result.append(intensifier)
            
            # Add the current token
            result.append(token.text)
        
        return " ".join(result)
    
    def add_rhetorical_question(self, sent, _):
        """Add a rhetorical question related to the statement."""
        # Create a rhetorical question based on the content
        doc = self.nlp(sent)
        
        # Extract key information
        subjects = [tok for tok in doc if tok.dep_ in ["nsubj", "nsubjpass"]]
        
        if subjects and random.random() < 0.7:
            subject = subjects[0].text
            
            # Various question templates
            templates = [
                f"Isn't that interesting about {subject}?",
                f"Don't you think so?",
                f"Right?",
                f"You know what I mean?",
                f"Can you imagine?",
                f"Who would have thought?",
                f"Why is that so important?"
            ]
            
            return f"{sent} {random.choice(templates)}"
        
        return sent
    
    def add_aside_comment(self, sent, _):
        """Add a parenthetical aside or comment."""
        # Inject an aside comment in the middle or end of the sentence
        doc = self.nlp(sent)
        words = [token.text for token in doc]
        
        # Choose position for the aside
        if len(words) > 5:
            position = random.randint(3, len(words) - 2) if len(words) > 5 else len(words)
        else:
            # If sentence is too short, add at the end
            position = len(words)
        
        # Create aside comments
        asides = [
            "by the way",
            "if you ask me",
            "I think",
            "you know",
            "to be honest",
            "believe it or not",
            "interestingly",
            "surprisingly",
            "and this is important"
        ]
        
        aside = random.choice(asides)
        
        # Insert the aside
        if position < len(words):
            # Insert in the middle, with commas
            words.insert(position, f", {aside},")
        else:
            # Add at the end
            if sent.endswith('.'):
                words[-1] = words[-1][:-1]  # Remove the period
                words.append(f", {aside}.")
            else:
                words.append(f", {aside}")
        
        return " ".join(words)
    
    # Word-level transformations
    def contextual_synonym_replacement(self, token, doc):
        """Replace a word with a contextually appropriate synonym."""
        # Only replace content words
        if token.pos_ not in ["NOUN", "VERB", "ADJ", "ADV"] or token.is_stop:
            return token.text
            
        # Find synonyms using WordNet
        synonyms = []
        for syn in wordnet.synsets(token.text):
            for lemma in syn.lemmas():
                synonym = lemma.name().replace('_', ' ')
                if synonym != token.text and synonym not in synonyms:
                    synonyms.append(synonym)
        
        # If no synonyms found, return original
        if not synonyms:
            return token.text
            
        # Filter synonyms that fit the context
        filtered_synonyms = []
        for synonym in synonyms[:5]:  # Limit checking to 5 synonyms for efficiency
            # Create a new document with the synonym
            new_text = doc.text.replace(token.text, synonym)
            new_doc = self.nlp(new_text)
            
            # Calculate similarity between original and modified text
            similarity = doc.similarity(new_doc)
            
            if similarity > 0.8:  # High semantic similarity threshold
                filtered_synonyms.append((synonym, similarity))
        
        # If no good contextual synonyms, return original
        if not filtered_synonyms:
            return token.text
            
        # Sort by similarity and choose one of the top options
        filtered_synonyms.sort(key=lambda x: x[1], reverse=True)
        return random.choice(filtered_synonyms[:3])[0]
    
    def contraction_expansion(self, token):
        """Toggle between contractions and their expansions."""
        if token.text.lower() in self.contractions:
            # Expand a contraction
            return self.contractions[token.text.lower()]
        elif token.text.lower() in self.expansions:
            # Contract an expansion
            return self.expansions[token.text.lower()]
        
        return token.text
    
    def add_filler_words(self, token):
        """Add filler words appropriate to the context."""
        # Determine appropriate filler category based on token properties
        filler_category = None
        
        if token.pos_ == "ADJ":
            filler_category = random.choice(["emphasis", "hedging"])
        elif token.pos_ == "VERB":
            filler_category = random.choice(["hesitation", "emphasis"])
        elif token.pos_ == "NOUN":
            filler_category = random.choice(["clarification", "informal"])
        else:
            filler_category = random.choice(list(self.filler_words.keys()))
            
        filler = random.choice(self.filler_words[filler_category])
        
        # Add the filler before the token
        return f"{filler} {token.text}"
    
    def informal_substitution(self, token):
        """Replace formal words with informal alternatives."""
        if token.text.lower() in self.informal_words:
            return random.choice(self.informal_words[token.text.lower()])
        
        return token.text
    
    def add_emphatic_repetition(self, token):
        """Add emphatic repetition for emphasis."""
        # Only repeat certain word types
        if token.pos_ in ["ADJ", "ADV"] and len(token.text) > 2:
            # Choose repetition style
            style = random.choice(["hyphen", "comma", "simple"])
            
            if style == "hyphen":
                return f"{token.text}-{token.text}"
            elif style == "comma":
                return f"{token.text}, {token.text}"
            else:
                return f"{token.text} {token.text}"
        
        return token.text
    
    # Paragraph-level transformations
    def add_discourse_markers(self, paragraph, profile):
        """Add discourse markers to enhance cohesion."""
        sentences = sent_tokenize(paragraph)
        
        if len(sentences) <= 1:
            return paragraph
            
        # Determine appropriate markers based on content
        marker_types = list(self.discourse_markers.keys())
        weighted_types = random.choices(
            marker_types,
            weights=[0.2, 0.2, 0.2, 0.15, 0.1, 0.1, 0.05],
            k=min(len(sentences)-1, 3)  # Don't add too many markers
        )
        
        # Add markers to random sentences
        num_markers = min(len(sentences) - 1, max(1, int(len(sentences) * 0.5)))
        positions = sorted(random.sample(range(1, len(sentences)), num_markers))
        
        for i, pos in enumerate(positions):
            marker_type = weighted_types[i % len(weighted_types)]
            marker = random.choice(self.discourse_markers[marker_type])
            
            # Add the marker at the beginning of the sentence
            sentences[pos] = f"{marker}, {sentences[pos][0].lower() + sentences[pos][1:]}"
            
        return " ".join(sentences)
    
    def adjust_formality(self, paragraph, profile):
        """Adjust the overall formality of the paragraph."""
        formality_level = profile.get('informal_rate', 0.5)
        
        # For formal text (low informality)
        if formality_level < 0.3:
            # Replace contractions with expansions
            for contraction, expansion in self.contractions.items():
                pattern = r'\b' + re.escape(contraction) + r'\b'
                paragraph = re.sub(pattern, expansion, paragraph, flags=re.IGNORECASE)
            
            # Remove certain informal phrases
            informal_phrases = ["you know", "like", "kinda", "sort of", "pretty much"]
            for phrase in informal_phrases:
                paragraph = re.sub(r'\b' + re.escape(phrase) + r'\b', '', paragraph, flags=re.IGNORECASE)
                
        # For informal text (high informality)
        elif formality_level > 0.7:
            # Replace formal words with informal alternatives
            for formal, informals in self.informal_words.items():
                pattern = r'\b' + re.escape(formal) + r'\b'
                if random.random() < 0.7:
                    replacement = random.choice(informals)
                    paragraph = re.sub(pattern, replacement, paragraph, flags=re.IGNORECASE)
            
            # Add contractions
            for expansion, contraction in self.expansions.items():
                if ' ' in expansion:  # Only multi-word expansions
                    pattern = r'\b' + re.escape(expansion) + r'\b'
                    paragraph = re.sub(pattern, contraction, paragraph, flags=re.IGNORECASE)
        
        return paragraph
    
    def add_cohesion_devices(self, paragraph, profile):
        """Add cohesion devices like pronouns and references."""
        sentences = sent_tokenize(paragraph)
        
        if len(sentences) <= 1:
            return paragraph
            
        # Parse the paragraph
        doc = self.nlp(paragraph)
        
        # Extract key entities
        entities = {}
        for ent in doc.ents:
            if ent.label_ in ["PERSON", "ORG", "GPE", "LOC", "PRODUCT"]:
                if ent.text not in entities:
                    entities[ent.text] = []
                entities[ent.text].append(ent.label_)
        
        # Extract key nouns
        nouns = [token.text for token in doc if token.pos_ == "NOUN" and len(token.text) > 3]
        
        # If no entities or nouns found, return original
        if not entities and not nouns:
            return paragraph
            
        # Choose an entity or noun to reference
        reference_subject = None
        if entities:
            reference_subject = random.choice(list(entities.keys()))
        elif nouns:
            reference_subject = random.choice(nouns)
        
        if not reference_subject:
            return paragraph
            
        # Choose a reference type
        ref_type = random.choice(["pronoun", "determiner", "repetition"])
        
        # Apply the reference in a later sentence
        if ref_type == "pronoun":
            # Simple pronoun substitution (could be improved with gender recognition)
            pronouns = ["it", "they", "this", "these", "that", "those"]
            pronoun = random.choice(pronouns)
            
            # Find a sentence with the reference subject
            for i, sent in enumerate(sentences):
                if reference_subject in sent and i < len(sentences) - 1:
                    # Replace in the next sentence if possible
                    next_sent = sentences[i+1]
                    if reference_subject in next_sent:
                        sentences[i+1] = next_sent.replace(reference_subject, pronoun, 1)
                        break
        
        elif ref_type == "determiner":
            # Add a determiner phrase
            determiners = ["this", "that", "these", "those", "the", "such a"]
            determiner = random.choice(determiners)
            
            # Find a sentence with the reference subject
            for i, sent in enumerate(sentences):
                if reference_subject in sent and i < len(sentences) - 1:
                    # Add in the next sentence if possible
                    sentences[i+1] = sentences[i+1].replace(
                        reference_subject, 
                        f"{determiner} {reference_subject}", 
                        1
                    )
                    break
        
        return " ".join(sentences)


# Example usage
if __name__ == "__main__":
    # Initialize the humanizer
    humanizer = EnhancedTextHumanizer()
    
    # Test text
    original_text = """
    Artificial intelligence has significantly impacted numerous industries. 
    It has improved efficiency in manufacturing through automation. 
    The healthcare sector has benefited from better diagnostic tools.
    Machine learning algorithms continue to advance and provide new solutions.
    Companies invest heavily in AI research and development.
    """
    
    # Test different personality types
    for personality in ['casual', 'formal', 'academic', 'enthusiastic']:
        print(f"\n--- {personality.upper()} PERSONALITY ---")
        humanized = humanizer.humanize_text(
            original_text, 
            intensity=0.7, 
            personality=personality
        )
        print(humanized)
    
    # Test with regional dialect
    print("\n--- REGIONAL DIALECT (US SOUTH) ---")
    humanized = humanizer.humanize_text(
        original_text, 
        intensity=0.7, 
        personality='casual',
        regional_dialect='us_south'
    )
    print(humanized)
    
    # Test with emotional tone
    print("\n--- EMOTIONAL TONE (POSITIVE) ---")
    humanized = humanizer.humanize_text(
        original_text, 
        intensity=0.7, 
        personality='enthusiastic',
        emotional_tone='positive'
    )
    print(humanized)