Spaces:

VarunRavichander
/

Humanizer-try

Build error

App Files Files Community

Humanizer-try / enhanced_text_humanizer.py

VarunRavichander

Update enhanced_text_humanizer.py

6fd1367 verified 12 months ago

raw

history blame contribute delete

46.1 kB

	import nltk
	import random
	import re
	import spacy
	import numpy as np
	from nltk.corpus import wordnet
	from nltk.tokenize import sent_tokenize, word_tokenize
	from nltk.tag import pos_tag
	from collections import defaultdict
	from transformers import pipeline
	spacy.load('en_core_web_sm')

	# With this code:
	try:
	nltk.data.find('tokenizers/punkt')
	nltk.data.find('taggers/averaged_perceptron_tagger')
	nltk.data.find('corpora/wordnet')
	except LookupError as e:
	print(f"NLTK resource error: {e}")
	print("Attempting to download missing resources...")
	nltk.download('punkt')
	nltk.download('punkt_tab')
	nltk.download('averaged_perceptron_tagger')
	nltk.download('wordnet')
	class EnhancedTextHumanizer:
	def __init__(self):
	# Load spaCy model
	self.nlp = spacy.load("en_core_web_sm")

	# Initialize sentiment analysis pipeline - for emotional variations
	try:
	self.sentiment_analyzer = pipeline("sentiment-analysis")
	except:
	print("Warning: Transformers sentiment analysis not available. Emotional variations will be limited.")
	self.sentiment_analyzer = None

	# Sentence-level transformations
	self.sentence_transformations = [
	self.merge_sentences,
	self.split_sentences,
	self.passive_to_active,
	self.active_to_passive,
	self.add_hedging,
	self.add_intensifiers,
	self.add_rhetorical_question,
	self.add_aside_comment
	]

	# Word-level transformations
	self.word_transformations = [
	self.contextual_synonym_replacement,
	self.contraction_expansion,
	self.add_filler_words,
	self.informal_substitution,
	self.add_emphatic_repetition
	]

	# Paragraph-level transformations
	self.paragraph_transformations = [
	self.add_discourse_markers,
	self.adjust_formality,
	self.add_cohesion_devices
	]

	# Filler words with categorization
	self.filler_words = {
	'hesitation': ["um", "uh", "er", "hmm", "like"],
	'emphasis': ["actually", "literally", "seriously", "honestly", "truly", "really"],
	'hedging': ["maybe", "perhaps", "probably", "possibly", "somewhat", "kinda", "sort of"],
	'clarification': ["I mean", "you know", "what I'm saying", "in other words"],
	'informal': ["basically", "totally", "absolutely", "pretty much", "y'know"]
	}

	# Contractions and their expansions
	self.contractions = {
	"can't": "cannot", "won't": "will not", "don't": "do not",
	"doesn't": "does not", "I'm": "I am", "you're": "you are",
	"they're": "they are", "we're": "we are", "it's": "it is",
	"that's": "that is", "who's": "who is", "what's": "what is",
	"there's": "there is", "here's": "here is", "he's": "he is",
	"she's": "she is", "I've": "I have", "you've": "you have",
	"we've": "we have", "they've": "they have", "I'll": "I will",
	"you'll": "you will", "he'll": "he will", "she'll": "she will",
	"we'll": "we will", "they'll": "they will", "I'd": "I would",
	"you'd": "you would", "he'd": "he would", "she'd": "she would",
	"we'd": "we would", "they'd": "they would", "let's": "let us",
	"ain't": "am not", "wasn't": "was not", "weren't": "were not",
	"hasn't": "has not", "haven't": "have not", "couldn't": "could not",
	"shouldn't": "should not", "wouldn't": "would not", "didn't": "did not",
	"isn't": "is not", "aren't": "are not", "mightn't": "might not",
	"mustn't": "must not", "shan't": "shall not", "needn't": "need not"
	}

	# Reverse contractions dictionary
	self.expansions = {v: k for k, v in self.contractions.items()}

	# Common hedging phrases
	self.hedging_phrases = [
	"I think", "It seems", "It appears", "From what I understand",
	"As far as I know", "In my opinion", "Arguably", "Presumably",
	"It could be that", "It's possible that", "One might say",
	"To some extent", "More or less", "Kind of", "Sort of"
	]

	# Intensifiers for emotional emphasis
	self.intensifiers = [
	"very", "extremely", "incredibly", "remarkably", "absolutely",
	"totally", "completely", "utterly", "entirely", "thoroughly",
	"ridiculously", "insanely", "super", "really", "quite",
	"unbelievably", "amazingly", "surprisingly", "exceptionally"
	]

	# Discourse markers for transitions
	self.discourse_markers = {
	'contrast': ["however", "but", "nevertheless", "on the other hand", "conversely", "in contrast", "yet"],
	'addition': ["moreover", "furthermore", "additionally", "also", "besides", "in addition", "plus"],
	'cause_effect': ["therefore", "consequently", "thus", "hence", "as a result", "so", "because of this"],
	'sequence': ["first", "second", "next", "then", "finally", "subsequently", "later"],
	'example': ["for example", "for instance", "specifically", "to illustrate", "such as", "namely"],
	'conclusion': ["in conclusion", "to sum up", "in summary", "overall", "ultimately", "in the end"],
	'emphasis': ["indeed", "certainly", "in fact", "obviously", "clearly", "notably", "significantly"]
	}

	# Informal substitutions
	self.informal_words = {
	"approximately": ["about", "around"],
	"assistance": ["help", "a hand"],
	"attempt": ["try", "shot", "stab"],
	"communicate": ["talk", "chat", "get in touch"],
	"comprehend": ["get", "understand"],
	"concerning": ["about", "on"],
	"consume": ["eat", "drink", "use up"],
	"currently": ["now", "right now"],
	"decrease": ["drop", "cut", "fall"],
	"difficult": ["hard", "tough"],
	"encounter": ["meet", "run into", "bump into"],
	"endeavor": ["try", "take a shot"],
	"excessive": ["too much", "over the top"],
	"expedite": ["speed up", "hurry"],
	"facilitate": ["help", "make easier"],
	"frequently": ["often", "a lot"],
	"fundamental": ["basic", "key"],
	"utilize": ["use"],
	"purchase": ["buy", "get"],
	"sufficient": ["enough"],
	"inquire": ["ask"],
	"obtain": ["get"],
	"require": ["need"],
	"additional": ["more", "extra"],
	"residence": ["home", "place"]
	}

	# Common speech error patterns
	self.speech_errors = {
	'restarts': self._generate_restart,
	'repetitions': self._generate_repetition,
	'corrections': self._generate_correction,
	'filled_pauses': self._generate_filled_pause,
	'agreement_errors': self._generate_agreement_error
	}

	# Regional dialect variations (simplified)
	self.regional_variations = {
	'us_south': {
	'you all': "y'all",
	'going to': "gonna",
	'want to': "wanna",
	'did not': "didn't",
	'yes': "yep",
	'no': "nope"
	},
	'british': {
	'apartment': "flat",
	'elevator': "lift",
	'trash': "rubbish",
	'sidewalk': "pavement",
	'vacation': "holiday",
	'soccer': "football"
	}
	}

	# Personality profiles (simplified)
	self.personality_profiles = {
	'casual': {
	'contraction_rate': 0.8,
	'informal_rate': 0.7,
	'hedging_rate': 0.3,
	'filler_rate': 0.4,
	'error_rate': 0.1
	},
	'formal': {
	'contraction_rate': 0.2,
	'informal_rate': 0.1,
	'hedging_rate': 0.5,
	'filler_rate': 0.1,
	'error_rate': 0.05
	},
	'academic': {
	'contraction_rate': 0.1,
	'informal_rate': 0.05,
	'hedging_rate': 0.6,
	'filler_rate': 0.1,
	'error_rate': 0.02
	},
	'enthusiastic': {
	'contraction_rate': 0.6,
	'informal_rate': 0.5,
	'hedging_rate': 0.2,
	'filler_rate': 0.3,
	'error_rate': 0.1,
	'intensifier_rate': 0.7
	}
	}

	# Emotional expression templates
	self.emotional_expressions = {
	'positive': [
	"I'm so happy about {topic}!",
	"This is amazing: {sentence}",
	"I love how {sentence}",
	"Wow, {sentence} That's incredible!",
	"I'm really excited about {topic}."
	],
	'negative': [
	"I'm not too thrilled about {topic}.",
	"Unfortunately, {sentence}",
	"I'm concerned that {sentence}",
	"This is disappointing: {sentence}",
	"I'm a bit worried about {topic}."
	],
	'neutral': [
	"In my view, {sentence}",
	"I think {sentence}",
	"From what I understand, {sentence}",
	"My take on {topic} is that {sentence}",
	"When it comes to {topic}, {sentence}"
	]
	}

	def humanize_text(self, text, intensity=0.5, personality='casual', add_errors=True, regional_dialect=None, emotional_tone=None):
	"""
	Enhanced main function to humanize text with multiple parameters for customization.

	Args:
	text (str): The input text to humanize
	intensity (float): Controls how much the text is transformed (0.0 to 1.0)
	personality (str): Personality profile to use ('casual', 'formal', 'academic', 'enthusiastic')
	add_errors (bool): Whether to add realistic speech/typing errors
	regional_dialect (str): Regional dialect to incorporate (None, 'us_south', 'british')
	emotional_tone (str): Overall emotional tone (None, 'positive', 'negative', 'neutral')

	Returns:
	str: Humanized text
	"""
	if intensity < 0 or intensity > 1:
	raise ValueError("Intensity must be between 0.0 and 1.0")

	# Apply personality profile
	profile = self.personality_profiles.get(personality, self.personality_profiles['casual'])

	# Parse the text with spaCy for better linguistic analysis
	doc = self.nlp(text)

	# Split text into paragraphs
	paragraphs = [p.strip() for p in text.split('\n') if p.strip()]

	# Apply paragraph-level transformations
	transformed_paragraphs = []
	for para in paragraphs:
	# Analyze sentiment if available
	sentiment = self._analyze_sentiment(para) if emotional_tone is None and self.sentiment_analyzer else None
	current_tone = emotional_tone or (sentiment['label'].lower() if sentiment else 'neutral')

	# Add emotional expressions based on tone
	if random.random() < intensity * 0.3 and current_tone in self.emotional_expressions:
	# Find a topic in the paragraph
	topic = self._extract_topic(para)
	expression = random.choice(self.emotional_expressions[current_tone])
	para = expression.format(topic=topic, sentence=para.lower() if para[0].isupper() else para)

	# Apply paragraph transformations
	for transform in self.paragraph_transformations:
	if random.random() < intensity * 0.4:
	para = transform(para, profile)

	# Split paragraph into sentences
	sentences = sent_tokenize(para)

	# Apply sentence-level transformations
	transformed_sentences = self._apply_sentence_transformations(sentences, intensity, profile)

	# Apply regional dialect if specified
	if regional_dialect and regional_dialect in self.regional_variations:
	transformed_para = " ".join(transformed_sentences)
	for original, variant in self.regional_variations[regional_dialect].items():
	# Use word boundaries to avoid partial replacements
	pattern = r'\b' + re.escape(original) + r'\b'
	if random.random() < intensity * 0.7:
	transformed_para = re.sub(pattern, variant, transformed_para, flags=re.IGNORECASE)
	transformed_paragraphs.append(transformed_para)
	else:
	transformed_paragraphs.append(" ".join(transformed_sentences))

	# Join paragraphs
	result = "\n\n".join(transformed_paragraphs)

	# Apply word-level transformations
	result = self._apply_word_transformations(result, intensity, profile)

	# Introduce speech errors if enabled
	if add_errors and intensity > 0.2:
	result = self._introduce_speech_errors(result, intensity * profile.get('error_rate', 0.1))

	# Normalize spacing around punctuation
	result = self._normalize_spacing(result)

	return result

	def _analyze_sentiment(self, text):
	"""Analyze sentiment of the text using the sentiment analyzer."""
	if self.sentiment_analyzer:
	try:
	return self.sentiment_analyzer(text)[0]
	except:
	pass
	return None

	def _extract_topic(self, text):
	"""Extract a potential topic from the text using spaCy."""
	doc = self.nlp(text)

	# Try to find entities
	entities = list(doc.ents)
	if entities:
	return entities[0].text

	# Try to find noun chunks
	chunks = list(doc.noun_chunks)
	if chunks:
	return chunks[0].text

	# Fallback to first sentence
	sentences = sent_tokenize(text)
	if sentences:
	words = word_tokenize(sentences[0])
	if words:
	return words[0]

	return "this"

	def _apply_sentence_transformations(self, sentences, intensity, profile):
	"""Apply various sentence-level transformations with personality profile influence."""
	result = []
	i = 0

	while i < len(sentences):
	# Randomly decide whether to apply a transformation
	if random.random() < intensity * 0.7:
	# Weight transformations based on personality
	weights = [
	1.0, # merge_sentences
	0.8, # split_sentences
	0.5 if profile.get('hedging_rate', 0.3) > 0.4 else 0.2, # passive_to_active
	0.2 if profile.get('hedging_rate', 0.3) > 0.4 else 0.5, # active_to_passive
	profile.get('hedging_rate', 0.3), # add_hedging
	profile.get('intensifier_rate', 0.4), # add_intensifiers
	0.3 if profile.get('informal_rate', 0.5) > 0.5 else 0.1, # add_rhetorical_question
	0.4 if profile.get('informal_rate', 0.5) > 0.4 else 0.2, # add_aside_comment
	]

	# Normalize weights
	weights = [w / sum(weights) for w in weights]

	# Choose a transformation based on weights
	transformation = random.choices(self.sentence_transformations, weights=weights)[0]

	# For transformations requiring two sentences
	if transformation in [self.merge_sentences] and i < len(sentences) - 1:
	transformed = transformation(sentences[i], sentences[i+1])
	result.append(transformed)
	i += 2
	# For transformations requiring one sentence
	else:
	transformed = transformation(sentences[i], "")
	result.append(transformed)
	i += 1
	else:
	result.append(sentences[i])
	i += 1

	return result

	def _apply_word_transformations(self, text, intensity, profile):
	"""Apply various word-level transformations with personality profile influence."""
	# Parse the text with spaCy for better context
	doc = self.nlp(text)

	# Apply transformations separately to preserve sentence structure
	sentences = [sent.text for sent in doc.sents]
	transformed_sentences = []

	for sentence in sentences:
	# Tokenize sentence
	sentence_doc = self.nlp(sentence)

	# Build a new sentence from tokens
	new_tokens = []
	i = 0

	while i < len(sentence_doc):
	token = sentence_doc[i]

	# Skip punctuation for most transformations
	if token.is_punct:
	new_tokens.append(token.text)
	i += 1
	continue

	# Randomly decide whether to apply a transformation
	if random.random() < intensity * 0.5:
	# Weight transformations based on personality
	weights = [
	0.6, # contextual_synonym_replacement
	profile.get('contraction_rate', 0.5), # contraction_expansion
	profile.get('filler_rate', 0.3), # add_filler_words
	profile.get('informal_rate', 0.5), # informal_substitution
	0.3 if profile.get('intensifier_rate', 0.4) > 0.5 else 0.1, # add_emphatic_repetition
	]

	# Normalize weights
	weights = [w / sum(weights) for w in weights]

	# Choose a transformation based on weights
	transformation = random.choices(self.word_transformations, weights=weights)[0]

	# Apply transformation
	if transformation == self.contextual_synonym_replacement:
	transformed = transformation(token, sentence_doc)
	elif transformation == self.contraction_expansion:
	# Need to check if this is a multi-word expansion
	if token.text.lower() in self.expansions:
	# This is a potential expansion point
	expansion = self.expansions[token.text.lower()]
	if ' ' in expansion and i < len(sentence_doc) - 1:
	# Check if the next tokens match the expansion
	expansion_parts = expansion.split()
	if expansion_parts[0].lower() == token.text.lower() and expansion_parts[1].lower() == sentence_doc[i+1].text.lower():
	# Apply contraction
	transformed = expansion
	i += 1 # Skip the next token
	else:
	transformed = token.text
	else:
	transformed = token.text
	else:
	transformed = transformation(token)
	elif transformation == self.add_filler_words:
	# Add a filler word before the current word
	if random.random() < 0.3: # Only occasionally add fillers
	filler_category = random.choice(list(self.filler_words.keys()))
	filler = random.choice(self.filler_words[filler_category])
	new_tokens.append(filler)
	transformed = token.text
	elif transformation == self.informal_substitution:
	transformed = transformation(token)
	elif transformation == self.add_emphatic_repetition:
	transformed = transformation(token)
	else:
	transformed = token.text

	new_tokens.append(transformed)
	else:
	new_tokens.append(token.text)

	i += 1

	# Recreate the sentence from tokens
	transformed_sentence = self._reconstruct_sentence(new_tokens)
	transformed_sentences.append(transformed_sentence)

	# Join the transformed sentences
	result = " ".join(transformed_sentences)

	return result

	def _reconstruct_sentence(self, tokens):
	"""Reconstruct a sentence from tokens, preserving proper spacing."""
	result = ""
	for i, token in enumerate(tokens):
	# Handle special cases for punctuation
	if token in ".,!?;:)]}" and result:
	result = result.rstrip() + token + " "
	# Don't add space after opening brackets
	elif i > 0 and tokens[i-1] in "([{" and result:
	result = result.rstrip() + token + " "
	elif token in "([{" and result:
	result = result.rstrip() + token
	# Handle quotes
	elif token in ['\'', '"'] and result and result[-1] != " ":
	result += token + " "
	else:
	result += token + " "

	return result.strip()

	def _introduce_speech_errors(self, text, error_rate):
	"""Introduce realistic speech/typing errors."""
	words = text.split()
	result = []

	for i, word in enumerate(words):
	if len(word) > 2 and random.random() < error_rate:
	# Select a random error type
	error_type = random.choice(list(self.speech_errors.keys()))

	# Apply the error
	error_func = self.speech_errors[error_type]
	if error_type in ['restarts', 'repetitions'] and i > 0:
	# These errors need previous context
	modified = error_func(words[i-1], word)
	result.pop() # Remove the previous word
	result.append(modified)
	else:
	result.append(error_func(word))
	else:
	result.append(word)

	return " ".join(result)

	def _normalize_spacing(self, text):
	"""Fix spacing around punctuation for a more natural look."""
	# Fix spacing around punctuation
	text = re.sub(r'\s+([.,;:!?)])', r'\1', text)
	text = re.sub(r'([(])\s+', r'\1', text)

	# Fix multiple spaces
	text = re.sub(r'\s{2,}', ' ', text)

	return text

	# Speech error generators
	def _generate_restart(self, prev_word, word):
	"""Generate a restart error (e.g., "I was- I was saying")."""
	return f"{prev_word}- {prev_word} {word}"

	def _generate_repetition(self, prev_word, word):
	"""Generate a word repetition (e.g., "the the cat")."""
	return f"{prev_word} {prev_word} {word}"

	def _generate_correction(self, word):
	"""Generate a self-correction (e.g., "teh the")."""
	if len(word) < 4:
	return word

	# Create a simple typo
	i = random.randint(0, len(word) - 2)
	typo = word[:i] + word[i+1] + word[i] + word[i+2:]

	# Choose correction style
	correction_style = random.choice(["asterisk", "dash", "explicit"])
	if correction_style == "asterisk":
	return f"{typo}{word}"
	elif correction_style == "dash":
	return f"{typo}-{word}"
	else:
	return f"{typo}, I mean {word}"

	def _generate_filled_pause(self, word):
	"""Generate a filled pause (e.g., "um, like")."""
	filler = random.choice(self.filler_words['hesitation'])
	return f"{filler}, {word}"

	def _generate_agreement_error(self, word):
	"""Generate a subject-verb agreement error (simplified)."""
	if word.endswith('s') and len(word) > 3:
	return word[:-1]
	elif not word.endswith('s') and random.random() < 0.5:
	return word + 's'
	return word

	# Enhanced sentence-level transformations
	def merge_sentences(self, sent1, sent2):
	"""Merge two sentences with a conjunction."""
	# Remove the period from the first sentence
	if sent1.endswith('.'):
	sent1 = sent1[:-1]

	# Choose a conjunction based on the content
	doc1 = self.nlp(sent1)
	doc2 = self.nlp(sent2)

	# Check for content relationship
	similarity = doc1.similarity(doc2)

	if similarity > 0.7:
	# Highly similar, use addition
	conjunction = random.choice(["and", "also", "moreover", "furthermore"])
	elif similarity < 0.3:
	# Dissimilar, use contrast
	conjunction = random.choice(["but", "however", "on the other hand", "yet"])
	else:
	# Moderate similarity, use general conjunction
	conjunction = random.choice(["and", "while", "so", "because", "although"])

	# Merge the sentences
	return f"{sent1} {conjunction} {sent2.lower() if sent2 and sent2[0].isupper() else sent2}"

	def split_sentences(self, sent, _):
	"""Split a longer sentence into two with improved linguistic awareness."""
	doc = self.nlp(sent)
	tokens = [token for token in doc]

	# Only split if sentence is long enough
	if len(tokens) < 8:
	return sent

	# Find a good split point based on dependency structure
	potential_splits = []
	for i, token in enumerate(tokens):
	# Good split points are often after conjunctions or punctuation
	if (token.dep_ in ['cc', 'prep', 'mark'] or token.pos_ == 'PUNCT') and 3 < i < len(tokens) - 3:
	potential_splits.append((i, 1)) # Higher weight for these
	# Or before a new clause
	elif token.dep_ in ['nsubj', 'nsubjpass'] and i > 3:
	potential_splits.append((i, 0.8))

	if not potential_splits:
	# Fallback to middle
	split_point = len(tokens) // 2
	else:
	# Choose a split point with weighted random selection
	points, weights = zip(*potential_splits)
	split_point = random.choices(points, weights=weights)[0]

	# Create two new sentences
	sent1 = "".join([t.text_with_ws for t in tokens[:split_point]])
	sent2 = "".join([t.text_with_ws for t in tokens[split_point:]])

	# Ensure proper capitalization and punctuation
	sent1 = sent1.rstrip()
	if not sent1.endswith(('.', '!', '?')):
	sent1 += '.'

	sent2 = sent2.strip()
	if sent2 and sent2[0].islower():
	sent2 = sent2[0].upper() + sent2[1:]

	return f"{sent1} {sent2}"

	def passive_to_active(self, sent, _):
	"""Convert passive voice to active voice using spaCy's dependency parsing."""
	doc = self.nlp(sent)

	# Look for passive constructions
	for token in doc:
	if token.dep_ == "nsubjpass":
	# Found passive voice
	subject = token
	agent = None
	verb = token.head

	# Find the agent (often introduced by "by")
	for child in doc:
	if child.dep_ == "agent" and child.head == verb:
	for grandchild in child.children:
	if grandchild.dep_ in ["pobj", "nmod"]:
	agent = grandchild
	break

	if agent:
	# Extract the core components
	subj_span = doc[subject.left_edge.i:subject.right_edge.i+1].text
	verb_span = doc[verb.i:verb.i+1].text
	agent_span = doc[agent.left_edge.i:agent.right_edge.i+1].text

	# Reconstruct in active voice
	active_verb = verb_span.replace("was ", "").replace("were ", "")
	# Remove trailing period for reconstruction
	if sent.endswith('.'):
	new_sent = f"{agent_span} {active_verb} {subj_span}."
	else:
	new_sent = f"{agent_span} {active_verb} {subj_span}"

	return new_sent

	# If no passive construction found or couldn't convert
	return sent

	def active_to_passive(self, sent, _):
	"""Convert active voice to passive voice using spaCy's dependency parsing."""
	doc = self.nlp(sent)

	# Look for active voice constructions
	for token in doc:
	if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
	# Found a subject and verb
	subject = token
	verb = token.head

	# Find the direct object
	obj = None
	for child in verb.children:
	if child.dep_ in ["dobj", "obj"]:
	obj = child
	break

	if obj:
	# Extract the core components
	subj_span = doc[subject.left_edge.i:subject.right_edge.i+1].text
	verb_span = doc[verb.i:verb.i+1].text
	obj_span = doc[obj.left_edge.i:obj.right_edge.i+1].text

	# Determine the passive verb form
	passive_verb = verb_span
	if verb_span.endswith("s"):
	passive_verb = passive_verb[:-1]

	# Reconstruct in passive voice
	# Remove trailing period for reconstruction
	if sent.endswith('.'):
	new_sent = f"{obj_span} was {passive_verb}ed by {subj_span}."
	else:
	new_sent = f"{obj_span} was {passive_verb}ed by {subj_span}"

	return new_sent

	# If no active construction found or couldn't convert
	return sent

	def add_hedging(self, sent, _):
	"""Add hedging language to a statement."""
	# Add a hedging phrase at the beginning of the sentence
	hedging = random.choice(self.hedging_phrases)

	# For questions, add hedging at the end
	if sent.endswith('?'):
	return f"{sent[:-1]}, {hedging.lower()}?"

	# For statements, add at the beginning
	if sent[0].isupper():
	return f"{hedging}, {sent[0].lower() + sent[1:]}"
	return f"{hedging}, {sent}"


	def add_intensifiers(self, sent, _):
	"""Add intensifiers to adjectives and adverbs."""
	doc = self.nlp(sent)
	words = list(doc)
	result = []

	for i, token in enumerate(words):
	# Add intensifier before adjectives and adverbs
	if token.pos_ in ["ADJ", "ADV"] and random.random() < 0.6:
	# Choose an appropriate intensifier
	intensifier = random.choice(self.intensifiers)
	# Add the intensifier before the adjective/adverb
	result.append(intensifier)

	# Add the current token
	result.append(token.text)

	return " ".join(result)

	def add_rhetorical_question(self, sent, _):
	"""Add a rhetorical question related to the statement."""
	# Create a rhetorical question based on the content
	doc = self.nlp(sent)

	# Extract key information
	subjects = [tok for tok in doc if tok.dep_ in ["nsubj", "nsubjpass"]]

	if subjects and random.random() < 0.7:
	subject = subjects[0].text

	# Various question templates
	templates = [
	f"Isn't that interesting about {subject}?",
	f"Don't you think so?",
	f"Right?",
	f"You know what I mean?",
	f"Can you imagine?",
	f"Who would have thought?",
	f"Why is that so important?"
	]

	return f"{sent} {random.choice(templates)}"

	return sent

	def add_aside_comment(self, sent, _):
	"""Add a parenthetical aside or comment."""
	# Inject an aside comment in the middle or end of the sentence
	doc = self.nlp(sent)
	words = [token.text for token in doc]

	# Choose position for the aside
	if len(words) > 5:
	position = random.randint(3, len(words) - 2) if len(words) > 5 else len(words)
	else:
	# If sentence is too short, add at the end
	position = len(words)

	# Create aside comments
	asides = [
	"by the way",
	"if you ask me",
	"I think",
	"you know",
	"to be honest",
	"believe it or not",
	"interestingly",
	"surprisingly",
	"and this is important"
	]

	aside = random.choice(asides)

	# Insert the aside
	if position < len(words):
	# Insert in the middle, with commas
	words.insert(position, f", {aside},")
	else:
	# Add at the end
	if sent.endswith('.'):
	words[-1] = words[-1][:-1] # Remove the period
	words.append(f", {aside}.")
	else:
	words.append(f", {aside}")

	return " ".join(words)

	# Word-level transformations
	def contextual_synonym_replacement(self, token, doc):
	"""Replace a word with a contextually appropriate synonym."""
	# Only replace content words
	if token.pos_ not in ["NOUN", "VERB", "ADJ", "ADV"] or token.is_stop:
	return token.text

	# Find synonyms using WordNet
	synonyms = []
	for syn in wordnet.synsets(token.text):
	for lemma in syn.lemmas():
	synonym = lemma.name().replace('_', ' ')
	if synonym != token.text and synonym not in synonyms:
	synonyms.append(synonym)

	# If no synonyms found, return original
	if not synonyms:
	return token.text

	# Filter synonyms that fit the context
	filtered_synonyms = []
	for synonym in synonyms[:5]: # Limit checking to 5 synonyms for efficiency
	# Create a new document with the synonym
	new_text = doc.text.replace(token.text, synonym)
	new_doc = self.nlp(new_text)

	# Calculate similarity between original and modified text
	similarity = doc.similarity(new_doc)

	if similarity > 0.8: # High semantic similarity threshold
	filtered_synonyms.append((synonym, similarity))

	# If no good contextual synonyms, return original
	if not filtered_synonyms:
	return token.text

	# Sort by similarity and choose one of the top options
	filtered_synonyms.sort(key=lambda x: x[1], reverse=True)
	return random.choice(filtered_synonyms[:3])[0]

	def contraction_expansion(self, token):
	"""Toggle between contractions and their expansions."""
	if token.text.lower() in self.contractions:
	# Expand a contraction
	return self.contractions[token.text.lower()]
	elif token.text.lower() in self.expansions:
	# Contract an expansion
	return self.expansions[token.text.lower()]

	return token.text

	def add_filler_words(self, token):
	"""Add filler words appropriate to the context."""
	# Determine appropriate filler category based on token properties
	filler_category = None

	if token.pos_ == "ADJ":
	filler_category = random.choice(["emphasis", "hedging"])
	elif token.pos_ == "VERB":
	filler_category = random.choice(["hesitation", "emphasis"])
	elif token.pos_ == "NOUN":
	filler_category = random.choice(["clarification", "informal"])
	else:
	filler_category = random.choice(list(self.filler_words.keys()))

	filler = random.choice(self.filler_words[filler_category])

	# Add the filler before the token
	return f"{filler} {token.text}"

	def informal_substitution(self, token):
	"""Replace formal words with informal alternatives."""
	if token.text.lower() in self.informal_words:
	return random.choice(self.informal_words[token.text.lower()])

	return token.text

	def add_emphatic_repetition(self, token):
	"""Add emphatic repetition for emphasis."""
	# Only repeat certain word types
	if token.pos_ in ["ADJ", "ADV"] and len(token.text) > 2:
	# Choose repetition style
	style = random.choice(["hyphen", "comma", "simple"])

	if style == "hyphen":
	return f"{token.text}-{token.text}"
	elif style == "comma":
	return f"{token.text}, {token.text}"
	else:
	return f"{token.text} {token.text}"

	return token.text

	# Paragraph-level transformations
	def add_discourse_markers(self, paragraph, profile):
	"""Add discourse markers to enhance cohesion."""
	sentences = sent_tokenize(paragraph)

	if len(sentences) <= 1:
	return paragraph

	# Determine appropriate markers based on content
	marker_types = list(self.discourse_markers.keys())
	weighted_types = random.choices(
	marker_types,
	weights=[0.2, 0.2, 0.2, 0.15, 0.1, 0.1, 0.05],
	k=min(len(sentences)-1, 3) # Don't add too many markers
	)

	# Add markers to random sentences
	num_markers = min(len(sentences) - 1, max(1, int(len(sentences) * 0.5)))
	positions = sorted(random.sample(range(1, len(sentences)), num_markers))

	for i, pos in enumerate(positions):
	marker_type = weighted_types[i % len(weighted_types)]
	marker = random.choice(self.discourse_markers[marker_type])

	# Add the marker at the beginning of the sentence
	sentences[pos] = f"{marker}, {sentences[pos][0].lower() + sentences[pos][1:]}"

	return " ".join(sentences)

	def adjust_formality(self, paragraph, profile):
	"""Adjust the overall formality of the paragraph."""
	formality_level = profile.get('informal_rate', 0.5)

	# For formal text (low informality)
	if formality_level < 0.3:
	# Replace contractions with expansions
	for contraction, expansion in self.contractions.items():
	pattern = r'\b' + re.escape(contraction) + r'\b'
	paragraph = re.sub(pattern, expansion, paragraph, flags=re.IGNORECASE)

	# Remove certain informal phrases
	informal_phrases = ["you know", "like", "kinda", "sort of", "pretty much"]
	for phrase in informal_phrases:
	paragraph = re.sub(r'\b' + re.escape(phrase) + r'\b', '', paragraph, flags=re.IGNORECASE)

	# For informal text (high informality)
	elif formality_level > 0.7:
	# Replace formal words with informal alternatives
	for formal, informals in self.informal_words.items():
	pattern = r'\b' + re.escape(formal) + r'\b'
	if random.random() < 0.7:
	replacement = random.choice(informals)
	paragraph = re.sub(pattern, replacement, paragraph, flags=re.IGNORECASE)

	# Add contractions
	for expansion, contraction in self.expansions.items():
	if ' ' in expansion: # Only multi-word expansions
	pattern = r'\b' + re.escape(expansion) + r'\b'
	paragraph = re.sub(pattern, contraction, paragraph, flags=re.IGNORECASE)

	return paragraph

	def add_cohesion_devices(self, paragraph, profile):
	"""Add cohesion devices like pronouns and references."""
	sentences = sent_tokenize(paragraph)

	if len(sentences) <= 1:
	return paragraph

	# Parse the paragraph
	doc = self.nlp(paragraph)

	# Extract key entities
	entities = {}
	for ent in doc.ents:
	if ent.label_ in ["PERSON", "ORG", "GPE", "LOC", "PRODUCT"]:
	if ent.text not in entities:
	entities[ent.text] = []
	entities[ent.text].append(ent.label_)

	# Extract key nouns
	nouns = [token.text for token in doc if token.pos_ == "NOUN" and len(token.text) > 3]

	# If no entities or nouns found, return original
	if not entities and not nouns:
	return paragraph

	# Choose an entity or noun to reference
	reference_subject = None
	if entities:
	reference_subject = random.choice(list(entities.keys()))
	elif nouns:
	reference_subject = random.choice(nouns)

	if not reference_subject:
	return paragraph

	# Choose a reference type
	ref_type = random.choice(["pronoun", "determiner", "repetition"])

	# Apply the reference in a later sentence
	if ref_type == "pronoun":
	# Simple pronoun substitution (could be improved with gender recognition)
	pronouns = ["it", "they", "this", "these", "that", "those"]
	pronoun = random.choice(pronouns)

	# Find a sentence with the reference subject
	for i, sent in enumerate(sentences):
	if reference_subject in sent and i < len(sentences) - 1:
	# Replace in the next sentence if possible
	next_sent = sentences[i+1]
	if reference_subject in next_sent:
	sentences[i+1] = next_sent.replace(reference_subject, pronoun, 1)
	break

	elif ref_type == "determiner":
	# Add a determiner phrase
	determiners = ["this", "that", "these", "those", "the", "such a"]
	determiner = random.choice(determiners)

	# Find a sentence with the reference subject
	for i, sent in enumerate(sentences):
	if reference_subject in sent and i < len(sentences) - 1:
	# Add in the next sentence if possible
	sentences[i+1] = sentences[i+1].replace(
	reference_subject,
	f"{determiner} {reference_subject}",
	1
	)
	break

	return " ".join(sentences)


	# Example usage
	if __name__ == "__main__":
	# Initialize the humanizer
	humanizer = EnhancedTextHumanizer()

	# Test text
	original_text = """
	Artificial intelligence has significantly impacted numerous industries.
	It has improved efficiency in manufacturing through automation.
	The healthcare sector has benefited from better diagnostic tools.
	Machine learning algorithms continue to advance and provide new solutions.
	Companies invest heavily in AI research and development.
	"""

	# Test different personality types
	for personality in ['casual', 'formal', 'academic', 'enthusiastic']:
	print(f"\n--- {personality.upper()} PERSONALITY ---")
	humanized = humanizer.humanize_text(
	original_text,
	intensity=0.7,
	personality=personality
	)
	print(humanized)

	# Test with regional dialect
	print("\n--- REGIONAL DIALECT (US SOUTH) ---")
	humanized = humanizer.humanize_text(
	original_text,
	intensity=0.7,
	personality='casual',
	regional_dialect='us_south'
	)
	print(humanized)

	# Test with emotional tone
	print("\n--- EMOTIONAL TONE (POSITIVE) ---")
	humanized = humanizer.humanize_text(
	original_text,
	intensity=0.7,
	personality='enthusiastic',
	emotional_tone='positive'
	)
	print(humanized)