""" Content Optimization Module Enhances content for better AI/LLM performance and GEO scores """ import json import re from typing import Dict, Any, List, Optional from langchain.prompts import ChatPromptTemplate class ContentOptimizer: """Main class for optimizing content for AI search engines""" def __init__(self, llm): self.llm = llm self.setup_prompts() def setup_prompts(self): """Initialize optimization prompts""" # Main content enhancement prompt self.enhancement_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems. Evaluate the input text based on the following criteria, assigning a score from 1–10 for each: Clarity: How easily can the content be understood? Structuredness: How well-organized and coherent is the content? LLM Answerability: How easily can an LLM extract precise answers from the content? Identify the most salient keywords. Rewrite the text to improve: - Clarity and precision - Logical structure and flow - Suitability for LLM-based information retrieval Present your analysis and optimized text in the following JSON format: ```json { "scores": { "clarity": 8.5, "structuredness": 7.0, "answerability": 9.0 }, "keywords": ["example", "installation", "setup"], "optimized_text": "..." } ```""" # SEO-style optimization prompt self.seo_style_prompt = """You are an AI-first SEO specialist. Optimize this content for AI search engines and LLM systems. Focus on: 1. Semantic keyword optimization 2. Question-answer format enhancement 3. Factual accuracy and authority signals 4. Conversational readiness 5. Citation-worthy structure Provide analysis and optimization in JSON: ```json { "seo_analysis": { "keyword_density": "analysis of current keywords", "semantic_gaps": ["missing semantic terms"], "readability_score": 8.5, "authority_signals": ["credentials", "citations"] }, "optimized_content": { "title_suggestions": ["optimized title 1", "optimized title 2"], "meta_description": "AI-optimized meta description", "enhanced_content": "full optimized content...", "structured_data_suggestions": ["schema markup recommendations"] }, "improvement_summary": { "changes_made": ["change 1", "change 2"], "expected_impact": "description of expected improvements" } } ```""" # Competitive content analysis prompt self.competitive_analysis_prompt = """Compare this content against best practices for AI search optimization. Identify gaps and opportunities. Original Content: {content} Analyze against these AI search factors: - Entity recognition and linking - Question coverage completeness - Factual statement clarity - Conversational flow - Semantic relationship mapping Provide competitive analysis in JSON format with specific recommendations.""" def optimize_content(self, content: str, analyze_only: bool = False, include_keywords: bool = True, optimization_type: str = "standard") -> Dict[str, Any]: """ Main content optimization function Args: content (str): Content to optimize analyze_only (bool): If True, only analyze without rewriting include_keywords (bool): Whether to include keyword analysis optimization_type (str): Type of optimization ("standard", "seo", "competitive") Returns: Dict: Optimization results with scores and enhanced content """ try: # Choose optimization approach if optimization_type == "seo": return self._seo_style_optimization(content, analyze_only) elif optimization_type == "competitive": return self._competitive_optimization(content) else: return self._standard_optimization(content, analyze_only, include_keywords) except Exception as e: return {'error': f"Optimization failed: {str(e)}"} def _standard_optimization(self, content: str, analyze_only: bool, include_keywords: bool) -> Dict[str, Any]: """Standard content optimization using enhancement prompt""" try: # Modify prompt based on options prompt_text = self.enhancement_prompt if analyze_only: prompt_text = prompt_text.replace( "Rewrite the text to improve:", "Analyze the text for potential improvements in:" ).replace( '"optimized_text": "..."', '"optimization_suggestions": ["suggestion 1", "suggestion 2"]' ) if not include_keywords: prompt_text = prompt_text.replace( '"keywords": ["example", "installation", "setup"],', '' ) # Create and run chain prompt_template = ChatPromptTemplate.from_messages([ ("system", prompt_text), ("user", content[:6000]) # Limit content length ]) chain = prompt_template | self.llm result = chain.invoke({}) # Parse result result_content = result.content if hasattr(result, 'content') else str(result) parsed_result = self._parse_optimization_result(result_content) # Add metadata parsed_result.update({ 'optimization_type': 'standard', 'analyze_only': analyze_only, 'original_length': len(content), 'original_word_count': len(content.split()) }) return parsed_result except Exception as e: return {'error': f"Standard optimization failed: {str(e)}"} def _seo_style_optimization(self, content: str, analyze_only: bool) -> Dict[str, Any]: """SEO-focused optimization for AI search engines""" try: prompt_template = ChatPromptTemplate.from_messages([ ("system", self.seo_style_prompt), ("user", f"Optimize this content for AI search engines:\n\n{content[:6000]}") ]) chain = prompt_template | self.llm result = chain.invoke({}) result_content = result.content if hasattr(result, 'content') else str(result) parsed_result = self._parse_optimization_result(result_content) # Add SEO-specific metadata parsed_result.update({ 'optimization_type': 'seo', 'analyze_only': analyze_only, 'seo_focused': True }) return parsed_result except Exception as e: return {'error': f"SEO optimization failed: {str(e)}"} def _competitive_optimization(self, content: str) -> Dict[str, Any]: """Competitive analysis-based optimization""" try: formatted_prompt = self.competitive_analysis_prompt.format(content=content[:5000]) prompt_template = ChatPromptTemplate.from_messages([ ("system", formatted_prompt), ("user", "Perform the competitive analysis and provide optimization recommendations.") ]) chain = prompt_template | self.llm result = chain.invoke({}) result_content = result.content if hasattr(result, 'content') else str(result) parsed_result = self._parse_optimization_result(result_content) parsed_result.update({ 'optimization_type': 'competitive', 'competitive_analysis': True }) return parsed_result except Exception as e: return {'error': f"Competitive optimization failed: {str(e)}"} def batch_optimize_content(self, content_list: List[str], optimization_type: str = "standard") -> List[Dict[str, Any]]: """ Optimize multiple pieces of content in batch Args: content_list (List[str]): List of content pieces to optimize optimization_type (str): Type of optimization to apply Returns: List[Dict]: List of optimization results """ results = [] for i, content in enumerate(content_list): try: result = self.optimize_content( content, optimization_type=optimization_type ) result['batch_index'] = i results.append(result) except Exception as e: results.append({ 'batch_index': i, 'error': f"Batch optimization failed: {str(e)}" }) return results def generate_content_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]: """ Generate multiple optimized variations of the same content Args: content (str): Original content num_variations (int): Number of variations to generate Returns: List[Dict]: List of content variations with analysis """ variations = [] variation_prompts = [ "Create a more conversational version optimized for AI chat responses", "Create a more authoritative version optimized for citations", "Create a more structured version optimized for question-answering" ] for i in range(min(num_variations, len(variation_prompts))): try: custom_prompt = f"""You are optimizing content for AI systems. {variation_prompts[i]}. Original content: {content[:4000]} Provide the optimized variation in JSON format: ```json {{ "variation_type": "conversational/authoritative/structured", "optimized_content": "the rewritten content...", "key_changes": ["change 1", "change 2"], "target_use_case": "description of ideal use case" }} ```""" prompt_template = ChatPromptTemplate.from_messages([ ("system", custom_prompt), ("user", "Generate the variation.") ]) chain = prompt_template | self.llm result = chain.invoke({}) result_content = result.content if hasattr(result, 'content') else str(result) parsed_result = self._parse_optimization_result(result_content) parsed_result.update({ 'variation_index': i, 'variation_prompt': variation_prompts[i] }) variations.append(parsed_result) except Exception as e: variations.append({ 'variation_index': i, 'error': f"Variation generation failed: {str(e)}" }) return variations def analyze_content_readability(self, content: str) -> Dict[str, Any]: """ Analyze content readability for AI systems Args: content (str): Content to analyze Returns: Dict: Readability analysis results """ try: # Basic readability metrics words = content.split() sentences = re.split(r'[.!?]+', content) sentences = [s.strip() for s in sentences if s.strip()] paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()] # Calculate metrics avg_words_per_sentence = len(words) / len(sentences) if sentences else 0 avg_sentences_per_paragraph = len(sentences) / len(paragraphs) if paragraphs else 0 # Character-based metrics avg_word_length = sum(len(word) for word in words) / len(words) if words else 0 # Complexity indicators long_sentences = [s for s in sentences if len(s.split()) > 20] complex_words = [w for w in words if len(w) > 6] return { 'basic_metrics': { 'total_words': len(words), 'total_sentences': len(sentences), 'total_paragraphs': len(paragraphs), 'avg_words_per_sentence': avg_words_per_sentence, 'avg_sentences_per_paragraph': avg_sentences_per_paragraph, 'avg_word_length': avg_word_length }, 'complexity_indicators': { 'long_sentences_count': len(long_sentences), 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0, 'complex_words_count': len(complex_words), 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0 }, 'ai_readability_score': self._calculate_ai_readability_score({ 'avg_words_per_sentence': avg_words_per_sentence, 'avg_word_length': avg_word_length, 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0 }), 'recommendations': self._generate_readability_recommendations({ 'avg_words_per_sentence': avg_words_per_sentence, 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0, 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0 }) } except Exception as e: return {'error': f"Readability analysis failed: {str(e)}"} def extract_key_entities(self, content: str) -> Dict[str, Any]: """ Extract key entities and topics for optimization Args: content (str): Content to analyze Returns: Dict: Extracted entities and topics """ try: entity_prompt = """Extract key entities, topics, and concepts from this content for AI optimization. Content: {content} Identify: 1. Named entities (people, places, organizations) 2. Key concepts and topics 3. Technical terms and jargon 4. Potential semantic keywords 5. Question-answer opportunities Format as JSON: ```json {{ "named_entities": ["entity1", "entity2"], "key_topics": ["topic1", "topic2"], "technical_terms": ["term1", "term2"], "semantic_keywords": ["keyword1", "keyword2"], "question_opportunities": ["What is...", "How does..."], "entity_relationships": ["relationship descriptions"] }} ```""" prompt_template = ChatPromptTemplate.from_messages([ ("system", entity_prompt.format(content=content[:5000])), ("user", "Extract the entities and topics.") ]) chain = prompt_template | self.llm result = chain.invoke({}) result_content = result.content if hasattr(result, 'content') else str(result) return self._parse_optimization_result(result_content) except Exception as e: return {'error': f"Entity extraction failed: {str(e)}"} def optimize_for_voice_search(self, content: str) -> Dict[str, Any]: """ Optimize content specifically for voice search and conversational AI Args: content (str): Content to optimize Returns: Dict: Voice search optimization results """ try: voice_prompt = """Optimize this content for voice search and conversational AI systems. Focus on: 1. Natural language patterns 2. Question-based structure 3. Conversational tone 4. Clear, direct answers 5. Featured snippet optimization Original content: {content} Provide optimization in JSON: ```json {{ "voice_optimized_content": "conversational version...", "question_answer_pairs": [ {{"question": "What is...", "answer": "Direct answer..."}}, {{"question": "How does...", "answer": "Step by step..."}} ], "featured_snippet_candidates": ["snippet 1", "snippet 2"], "natural_language_improvements": ["improvement 1", "improvement 2"], "conversational_score": 8.5 }} ```""" prompt_template = ChatPromptTemplate.from_messages([ ("system", voice_prompt.format(content=content[:4000])), ("user", "Optimize for voice search.") ]) chain = prompt_template | self.llm result = chain.invoke({}) result_content = result.content if hasattr(result, 'content') else str(result) parsed_result = self._parse_optimization_result(result_content) parsed_result.update({ 'optimization_type': 'voice_search', 'voice_optimized': True }) return parsed_result except Exception as e: return {'error': f"Voice search optimization failed: {str(e)}"} def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]: """Parse LLM response and extract structured results""" try: # Find JSON content in the response json_start = response_text.find('{') json_end = response_text.rfind('}') + 1 if json_start != -1 and json_end != -1: json_str = response_text[json_start:json_end] parsed = json.loads(json_str) # Ensure consistent structure if 'scores' not in parsed and 'score' in parsed: parsed['scores'] = parsed['score'] return parsed else: # If no JSON found, return raw response with error flag return { 'raw_response': response_text, 'parsing_error': 'No JSON structure found in response', 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0} } except json.JSONDecodeError as e: return { 'raw_response': response_text, 'parsing_error': f'JSON decode error: {str(e)}', 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0} } except Exception as e: return { 'raw_response': response_text, 'parsing_error': f'Unexpected parsing error: {str(e)}', 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0} } def _calculate_ai_readability_score(self, metrics: Dict[str, float]) -> float: """Calculate AI-specific readability score""" try: # Optimal ranges for AI consumption optimal_words_per_sentence = 15 # Sweet spot for AI processing optimal_word_length = 5 # Balance of complexity and clarity optimal_complex_words_percentage = 15 # Some complexity is good for authority # Calculate deviations from optimal sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - optimal_words_per_sentence) * 0.5) word_length_score = max(0, 10 - abs(metrics['avg_word_length'] - optimal_word_length) * 2) complexity_score = max(0, 10 - abs(metrics['complex_words_percentage'] - optimal_complex_words_percentage) * 0.3) # Weighted average overall_score = (sentence_score * 0.4 + word_length_score * 0.3 + complexity_score * 0.3) return round(overall_score, 1) except Exception: return 5.0 # Default neutral score def _generate_readability_recommendations(self, metrics: Dict[str, float]) -> List[str]: """Generate specific readability improvement recommendations""" recommendations = [] try: if metrics['avg_words_per_sentence'] > 20: recommendations.append("Break down long sentences for better AI processing") elif metrics['avg_words_per_sentence'] < 8: recommendations.append("Consider combining very short sentences for better context") if metrics['long_sentences_percentage'] > 30: recommendations.append("Reduce the number of complex sentences (>20 words)") if metrics['complex_words_percentage'] > 25: recommendations.append("Simplify vocabulary where possible for broader accessibility") elif metrics['complex_words_percentage'] < 5: recommendations.append("Add more specific terminology to establish authority") return recommendations except Exception: return ["Unable to generate specific recommendations"]