Spaces:

MBilal-72
/

GenerativeEngineOptimization

Runtime error

App Files Files Community

GenerativeEngineOptimization / utils /optimizer.py

MBilal-72

Update utils/optimizer.py

3cf0597 verified 6 months ago

raw

history blame

26.9 kB

	"""
	Content Optimization Module
	Enhances content for better AI/LLM performance and GEO scores
	"""

	import json
	import re
	from typing import Dict, Any, List, Optional
	from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate


	class ContentOptimizer:
	"""Main class for optimizing content for AI search engines"""

	def __init__(self, llm):
	self.llm = llm
	self.setup_prompts()

	def setup_prompts(self):
	"""Initialize optimization prompts"""

	# Main content enhancement prompt
	self.enhancement_prompt = (
	"You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.\n\n"
	"Evaluate the input text based on the following criteria, assigning a score from 1-10 for each:\n"
	"- Clarity: How easily can the content be understood?\n"
	"- Structuredness: How well-organized and coherent is the content?\n"
	"- LLM Answerability: How easily can an LLM extract precise answers from the content?\n\n"
	"Identify the most salient keywords.\n\n"
	"Rewrite the text to improve:\n"
	"- Clarity and precision\n"
	"- Logical structure and flow\n"
	"- Suitability for LLM-based information retrieval\n\n"
	"Present your analysis and optimized text in the following JSON format:\n"
	"```json\n"
	"{{\n"
	" \"scores\": {{\n"
	" \"clarity\": 8.5,\n"
	" \"structuredness\": 7.0,\n"
	" \"answerability\": 9.0\n"
	" }},\n"
	" \"keywords\": [\"example\", \"installation\", \"setup\"],\n,"
	" \"optimized_text\": \"...\"\n,"
	"}}\n"
	"```"
	)

	# SEO-style optimization prompt
	self.seo_style_prompt = (
	"You are an AI-first SEO specialist. Optimize this content for AI search engines and LLM systems. "
	"Focus on:\n"
	"1. Semantic keyword optimization\n"
	"2. Question-answer format enhancement\n"
	"3. Factual accuracy and authority signals\n"
	"4. Conversational readiness\n"
	"5. Citation-worthy structure\n"
	"Provide analysis and optimization in JSON:\n"
	"```json\n"
	"{{\n"
	" \"seo_analysis\": {{\n"
	" \"keyword_density\": \"analysis of current keywords\",\n"
	" \"semantic_gaps\": [\"missing semantic terms\"],\n"
	" \"readability_score\": 8.5,\n"
	" \"authority_signals\": [\"credentials\", \"citations\"]\n"
	" }},\n"
	" \"optimized_content\": {{\n"
	" \"title_suggestions\": [\"optimized title 1\", \"optimized title 2\"],\n"
	" \"meta_description\": \"AI-optimized meta description\",\n"
	" \"enhanced_content\": \"full optimized content...\",\n"
	" \"structured_data_suggestions\": [\"schema markup recommendations\"]\n"
	" }},\n"
	" \"improvement_summary\": {{\n"
	" \"changes_made\": [\"change 1\", \"change 2\"],\n"
	" \"expected_impact\": \"description of expected improvements\"\n"
	" }}\n"
	"}}\n"
	"```"
	)

	# Competitive content analysis prompt
	# self.competitive_analysis_prompt = ("Analyze the following content for AI search optimization gaps in entities, questions, clarity, flow, and semantic links. Return JSON with gaps and actionable recommendations.\nContent: {content}")
	self.competitive_analysis_prompt = (
	"Analyze the following content for AI search optimization gaps in entities, questions, clarity, flow, and semantic links. "
	"Return JSON with gaps and actionable recommendations.\n"
	"Content: {content}\n"
	"Provide competitive analysis in JSON format:\n"
	"{{\n"
	" \"competitive_analysis\": {{\n"
	" \"entity_gaps\": [\"gap1\", \"gap2\"],\n"
	" \"question_coverage\": \"summary of coverage\",\n"
	" \"factual_clarity\": \"assessment\",\n"
	" \"conversational_flow\": \"assessment\",\n"
	" \"semantic_relationships\": [\"relationship1\", \"relationship2\"]\n"
	" }},\n"
	" \"recommendations\": [\"recommendation 1\", \"recommendation 2\"]\n"
	"}}\n"
	)
	self.voice_prompt = (
	"""
	Optimize this content for voice search and conversational AI systems.
	Focus on:
	1. Natural language patterns
	2. Question-based structure
	3. Conversational tone
	4. Clear, direct answers
	5. Featured snippet optimization
	Original content: {content}
	Provide optimization in JSON:
	```json
	{{
	"voice_optimized_content": "conversational version...",
	"question_answer_pairs": [
	{{"question": "What is...", "answer": "Direct answer..."}},
	{{"question": "How does...", "answer": "Step by step..."}}
	],
	"featured_snippet_candidates": ["snippet 1", "snippet 2"],
	"natural_language_improvements": ["improvement 1", "improvement 2"],
	"conversational_score": 8.5
	}}
	```
	"""
	)


	# Dedicated prompt for rewriting/optimizing content
	self.optimization_rewrite_prompt = (
	"You are an expert AI content optimizer. Rewrite the provided text to maximize clarity, logical structure, and suitability for LLM-based search and conversational AI. "
	"Your rewritten version should be more precise, well-organized, and easier for AI systems to extract answers from. "
	"Return your output in the following JSON format:\n"
	"```json\n"
	"{{\n"
	" \"optimized_text\": \"...your rewritten content here...\"\n"
	"}}\n"
	"```"
	)

	def optimize_content(self, content: str, analyze_only: bool = False,
	include_keywords: bool = True, optimization_type: str = "seo") -> Dict[str, Any]:
	"""
	Main content optimization function
	Args:
	content (str): Content to optimize
	analyze_only (bool): If True, only analyze without rewriting
	include_keywords (bool): Whether to include keyword analysis
	optimization_type (str): Type of optimization ("standard", "seo", "competitive")
	Returns:
	Dict: Optimization results with scores and enhanced content
	"""
	try:
	# Choose optimization approach
	if optimization_type == "seo" and not analyze_only:
	return self._seo_style_optimization(content, analyze_only)
	elif optimization_type == "competitive" and not analyze_only:
	return self._competitive_optimization(content)
	else:
	return self._standard_optimization(content, analyze_only, include_keywords)

	except Exception as e:
	return {'error': f"Optimization failed: {str(e)}"}

	def _standard_optimization(self, content: str, analyze_only: bool, include_keywords: bool) -> Dict[str, Any]:
	"""Standard content optimization using enhancement prompt"""
	try:
	# Always assign prompt_text
	if analyze_only is True:
	prompt_text = self.enhancement_prompt
	prompt_text = prompt_text.replace(
	"Rewrite the text to improve:",
	"Analyze the text for potential improvements in:"
	).replace(
	'"optimized_text": "..."',
	'"optimization_suggestions": ["suggestion 1", "suggestion 2"]'
	)
	if not include_keywords:
	prompt_text = prompt_text.replace(
	'"keywords": ["example", "installation", "setup"],',
	''
	)
	else:
	# Use dedicated rewrite prompt for optimization
	prompt_text = self.optimization_rewrite_prompt

	prompt_template = ChatPromptTemplate.from_messages([
	SystemMessagePromptTemplate.from_template(prompt_text),
	HumanMessagePromptTemplate.from_template(content[:6000])
	])

	chain = prompt_template \| self.llm
	result = chain.invoke({})

	result_content = result.content if hasattr(result, 'content') else str(result)
	parsed_result = self._parse_optimization_result(result_content)

	parsed_result.update({
	'optimization_type': 'standard',
	'analyze_only': analyze_only,
	'original_length': len(content),
	'original_word_count': len(content.split())
	})

	return parsed_result

	except Exception as e:
	return {'error': f"Standard optimization failed: {str(e)}"}
	def _seo_style_optimization(self, content: str, analyze_only: bool) -> Dict[str, Any]:
	"""SEO-focused optimization for AI search engines"""
	try:
	prompt_template = ChatPromptTemplate.from_messages([
	SystemMessagePromptTemplate.from_template(self.seo_style_prompt),
	HumanMessagePromptTemplate.from_template(f"Optimize this content for AI search engines:\n\n{content[:6000]}")
	])


	chain = prompt_template \| self.llm
	result = chain.invoke({})

	result_content = result.content if hasattr(result, 'content') else str(result)
	parsed_result = self._parse_optimization_result(result_content)

	# Add SEO-specific metadata
	parsed_result.update({
	'optimization_type': 'seo',
	'analyze_only': analyze_only,
	'seo_focused': True
	})

	return parsed_result

	except Exception as e:
	return {'error': f"SEO optimization failed: {str(e)}"}

	def _competitive_optimization(self, content: str) -> Dict[str, Any]:
	"""Competitive analysis-based optimization"""
	try:
	formatted_prompt = self.competitive_analysis_prompt.format(content=content[:5000])

	prompt_template = ChatPromptTemplate.from_messages([
	SystemMessagePromptTemplate.from_template(formatted_prompt),
	HumanMessagePromptTemplate.from_template("Perform the competitive analysis and provide optimization recommendations.")
	])
	# ("system", formatted_prompt),
	# ("user", "Perform the competitive analysis and provide optimization recommendations.")

	chain = prompt_template \| self.llm
	result = chain.invoke({})

	result_content = result.content if hasattr(result, 'content') else str(result)
	parsed_result = self._parse_optimization_result(result_content)

	parsed_result.update({
	'optimization_type': 'competitive',
	'competitive_analysis': True
	})

	return parsed_result

	except Exception as e:
	return {'error': f"Competitive optimization failed: {str(e)}"}

	# def batch_optimize_content(self, content_list: List[str], optimization_type: str = "standard") -> List[Dict[str, Any]]:
	# """
	# Optimize multiple pieces of content in batch

	# Args:
	# content_list (List[str]): List of content pieces to optimize
	# optimization_type (str): Type of optimization to apply

	# Returns:
	# List[Dict]: List of optimization results
	# """
	# results = []

	# for i, content in enumerate(content_list):
	# try:
	# result = self.optimize_content(
	# content,
	# optimization_type=optimization_type
	# )
	# result['batch_index'] = i
	# results.append(result)

	# except Exception as e:
	# results.append({
	# 'batch_index': i,
	# 'error': f"Batch optimization failed: {str(e)}"
	# })

	# return results

	# def generate_content_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]:
	# """
	# Generate multiple optimized variations of the same content

	# Args:
	# content (str): Original content
	# num_variations (int): Number of variations to generate

	# Returns:
	# List[Dict]: List of content variations with analysis
	# """
	# variations = []

	# variation_prompts = [
	# "Create a more conversational version optimized for AI chat responses",
	# "Create a more authoritative version optimized for citations",
	# "Create a more structured version optimized for question-answering"
	# ]

	# for i in range(min(num_variations, len(variation_prompts))):
	# try:
	# custom_prompt = f"""You are optimizing content for AI systems. {variation_prompts[i]}.

	# Original content: {content[:4000]}

	# Provide the optimized variation in JSON format:
	# ```json
	# {{
	# "variation_type": "conversational/authoritative/structured",
	# "optimized_content": "the rewritten content...",
	# "key_changes": ["change 1", "change 2"],
	# "target_use_case": "description of ideal use case"
	# }}
	# ```
	# """

	# prompt_template = ChatPromptTemplate.from_messages([
	# SystemMessagePromptTemplate.from_template(custom_prompt),
	# HumanMessagePromptTemplate.from_template("Generate the variation.")
	# ])
	# # ("system", custom_prompt),
	# # ("user", "Generate the variation.")

	# chain = prompt_template \| self.llm
	# result = chain.invoke({})

	# result_content = result.content if hasattr(result, 'content') else str(result)
	# parsed_result = self._parse_optimization_result(result_content)

	# parsed_result.update({
	# 'variation_index': i,
	# 'variation_prompt': variation_prompts[i]
	# })

	# variations.append(parsed_result)

	# except Exception as e:
	# variations.append({
	# 'variation_index': i,
	# 'error': f"Variation generation failed: {str(e)}"
	# })

	# return variations

	def analyze_content_readability(self, content: str) -> Dict[str, Any]:
	"""
	Analyze content readability for AI systems

	Args:
	content (str): Content to analyze

	Returns:
	Dict: Readability analysis results
	"""
	try:
	# Basic readability metrics
	words = content.split()
	sentences = re.split(r'[.!?]+', content)
	sentences = [s.strip() for s in sentences if s.strip()]

	paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]

	# Calculate metrics
	avg_words_per_sentence = len(words) / len(sentences) if sentences else 0
	avg_sentences_per_paragraph = len(sentences) / len(paragraphs) if paragraphs else 0

	# Character-based metrics
	avg_word_length = sum(len(word) for word in words) / len(words) if words else 0

	# Complexity indicators
	long_sentences = [s for s in sentences if len(s.split()) > 20]
	complex_words = [w for w in words if len(w) > 6]

	return {
	'basic_metrics': {
	'total_words': len(words),
	'total_sentences': len(sentences),
	'total_paragraphs': len(paragraphs),
	'avg_words_per_sentence': avg_words_per_sentence,
	'avg_sentences_per_paragraph': avg_sentences_per_paragraph,
	'avg_word_length': avg_word_length
	},
	'complexity_indicators': {
	'long_sentences_count': len(long_sentences),
	'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
	'complex_words_count': len(complex_words),
	'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
	},
	'ai_readability_score': self._calculate_ai_readability_score({
	'avg_words_per_sentence': avg_words_per_sentence,
	'avg_word_length': avg_word_length,
	'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
	}),
	'recommendations': self._generate_readability_recommendations({
	'avg_words_per_sentence': avg_words_per_sentence,
	'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
	'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
	})
	}

	except Exception as e:
	return {'error': f"Readability analysis failed: {str(e)}"}

	# def extract_key_entities(self, content: str) -> Dict[str, Any]:
	# """
	# Extract key entities and topics for optimization

	# Args:
	# content (str): Content to analyze

	# Returns:
	# Dict: Extracted entities and topics
	# """
	# try:
	# entity_prompt = """Extract key entities, topics, and concepts from this content for AI optimization.

	# Content: {content}

	# Identify:
	# 1. Named entities (people, places, organizations)
	# 2. Key concepts and topics
	# 3. Technical terms and jargon
	# 4. Potential semantic keywords
	# 5. Question-answer opportunities

	# Format as JSON:
	# ```json
	# {{
	# "named_entities": ["entity1", "entity2"],
	# "key_topics": ["topic1", "topic2"],
	# "technical_terms": ["term1", "term2"],
	# "semantic_keywords": ["keyword1", "keyword2"],
	# "question_opportunities": ["What is...", "How does..."],
	# "entity_relationships": ["relationship descriptions"]
	# }}
	# ```
	# """

	# prompt_template = ChatPromptTemplate.from_messages([
	# SystemMessagePromptTemplate.from_template(entity_prompt.format(content=content[:5000])),
	# HumanMessagePromptTemplate.from_template("Extract the entities and topics.")
	# ])
	# # ("system", entity_prompt.format(content=content[:5000])),
	# # ("user", "Extract the entities and topics.")

	# chain = prompt_template \| self.llm
	# result = chain.invoke({})

	# result_content = result.content if hasattr(result, 'content') else str(result)
	# return self._parse_optimization_result(result_content)

	# except Exception as e:
	# return {'error': f"Entity extraction failed: {str(e)}"}

	def optimize_for_voice_search(self, content: str) -> Dict[str, Any]:
	"""
	Optimize content specifically for voice search and conversational AI

	Args:
	content (str): Content to optimize

	Returns:
	Dict: Voice search optimization results
	"""
	try:
	# self.voice_prompt = ("Optimize the following content for voice search and conversational AI by improving natural language flow, question-based structure, tone, and featured snippet potential. Return JSON with improved content, Q&A pairs, snippet candidates, and a conversational score.\nContent: {content}")


	prompt_template = ChatPromptTemplate.from_messages([
	SystemMessagePromptTemplate.from_template(voice_prompt.format(content=content[:4000])),
	HumanMessagePromptTemplate.from_template("Optimize for voice search.")
	])
	# ("system", voice_prompt.format(content=content[:4000])),
	# ("user", "Optimize for voice search.")

	chain = prompt_template \| self.llm
	result = chain.invoke({})

	result_content = result.content if hasattr(result, 'content') else str(result)
	parsed_result = self._parse_optimization_result(result_content)

	parsed_result.update({
	'optimization_type': 'voice_search',
	'voice_optimized': True
	})

	return parsed_result

	except Exception as e:
	return {'error': f"Voice search optimization failed: {str(e)}"}

	def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]:
	"""Parse LLM response and extract structured results"""
	try:
	# Find JSON content in the response
	json_start = response_text.find('{')
	json_end = response_text.rfind('}') + 1

	if json_start != -1 and json_end != -1:
	json_str = response_text[json_start:json_end]
	parsed = json.loads(json_str)

	# Ensure consistent structure
	if 'scores' not in parsed and 'score' in parsed:
	parsed['scores'] = parsed['score']

	return parsed
	else:
	# If no JSON found, return raw response with error flag
	return {
	'raw_response': response_text,
	'parsing_error': 'No JSON structure found in response',
	'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
	}

	except json.JSONDecodeError as e:
	return {
	'raw_response': response_text,
	'parsing_error': f'JSON decode error: {str(e)}',
	'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
	}
	except Exception as e:
	return {
	'raw_response': response_text,
	'parsing_error': f'Unexpected parsing error: {str(e)}',
	'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
	}

	def _calculate_ai_readability_score(self, metrics: Dict[str, float]) -> float:
	"""Calculate AI-specific readability score"""
	try:
	# Optimal ranges for AI consumption
	optimal_words_per_sentence = 15 # Sweet spot for AI processing
	optimal_word_length = 5 # Balance of complexity and clarity
	optimal_complex_words_percentage = 15 # Some complexity is good for authority

	# Calculate deviations from optimal
	sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - optimal_words_per_sentence) * 0.5)
	word_length_score = max(0, 10 - abs(metrics['avg_word_length'] - optimal_word_length) * 2)
	complexity_score = max(0, 10 - abs(metrics['complex_words_percentage'] - optimal_complex_words_percentage) * 0.3)

	# Weighted average
	overall_score = (sentence_score * 0.4 + word_length_score * 0.3 + complexity_score * 0.3)

	return round(overall_score, 1)

	except Exception:
	return 5.0 # Default neutral score

	def _generate_readability_recommendations(self, metrics: Dict[str, float]) -> List[str]:
	"""Generate specific readability improvement recommendations"""
	recommendations = []

	try:
	if metrics['avg_words_per_sentence'] > 20:
	recommendations.append("Break down long sentences for better AI processing")
	elif metrics['avg_words_per_sentence'] < 8:
	recommendations.append("Consider combining very short sentences for better context")

	if metrics['long_sentences_percentage'] > 30:
	recommendations.append("Reduce the number of complex sentences (>20 words)")

	if metrics['complex_words_percentage'] > 25:
	recommendations.append("Simplify vocabulary where possible for broader accessibility")
	elif metrics['complex_words_percentage'] < 5:
	recommendations.append("Add more specific terminology to establish authority")

	return recommendations

	except Exception:
	return ["Unable to generate specific recommendations"]