Spaces:
Runtime error
Runtime error
| """ | |
| Content Optimization Module | |
| Enhances content for better AI/LLM performance and GEO scores | |
| """ | |
| import json | |
| import re | |
| from typing import Dict, Any, List, Optional | |
| from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate | |
| class ContentOptimizer: | |
| """Main class for optimizing content for AI search engines""" | |
| def __init__(self, llm): | |
| self.llm = llm | |
| self.setup_prompts() | |
| def setup_prompts(self): | |
| """Initialize optimization prompts""" | |
| # Main content enhancement prompt | |
| self.enhancement_prompt = ( | |
| "You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.\n\n" | |
| "Evaluate the input text based on the following criteria, assigning a score from 1-10 for each:\n" | |
| "- Clarity: How easily can the content be understood?\n" | |
| "- Structuredness: How well-organized and coherent is the content?\n" | |
| "- LLM Answerability: How easily can an LLM extract precise answers from the content?\n\n" | |
| "Identify the most salient keywords.\n\n" | |
| "Rewrite the text to improve:\n" | |
| "- Clarity and precision\n" | |
| "- Logical structure and flow\n" | |
| "- Suitability for LLM-based information retrieval\n\n" | |
| "Present your analysis and optimized text in the following JSON format:\n" | |
| "```json\n" | |
| "{{\n" | |
| " \"scores\": {{\n" | |
| " \"clarity\": 8.5,\n" | |
| " \"structuredness\": 7.0,\n" | |
| " \"answerability\": 9.0\n" | |
| " }},\n" | |
| " \"keywords\": [\"example\", \"installation\", \"setup\"],\n" | |
| " \"optimized_text\": \"...\"\n" | |
| "}}\n" | |
| "```" | |
| ) | |
| # SEO-style optimization prompt | |
| self.seo_style_prompt = ( | |
| "You are an AI-first SEO specialist. Optimize this content for AI search engines and LLM systems. " | |
| "Focus on:\n" | |
| "1. Semantic keyword optimization\n" | |
| "2. Question-answer format enhancement\n" | |
| "3. Factual accuracy and authority signals\n" | |
| "4. Conversational readiness\n" | |
| "5. Citation-worthy structure\n" | |
| "Provide analysis and optimization in JSON:\n" | |
| "```json\n" | |
| "{{\n" | |
| " \"seo_analysis\": {{\n" | |
| " \"keyword_density\": \"analysis of current keywords\",\n" | |
| " \"semantic_gaps\": [\"missing semantic terms\"],\n" | |
| " \"readability_score\": 8.5,\n" | |
| " \"authority_signals\": [\"credentials\", \"citations\"]\n" | |
| " }},\n" | |
| " \"optimized_content\": {{\n" | |
| " \"title_suggestions\": [\"optimized title 1\", \"optimized title 2\"],\n" | |
| " \"meta_description\": \"AI-optimized meta description\",\n" | |
| " \"enhanced_content\": \"full optimized content...\",\n" | |
| " \"structured_data_suggestions\": [\"schema markup recommendations\"]\n" | |
| " }},\n" | |
| " \"improvement_summary\": {{\n" | |
| " \"changes_made\": [\"change 1\", \"change 2\"],\n" | |
| " \"expected_impact\": \"description of expected improvements\"\n" | |
| " }}\n" | |
| "}}\n" | |
| "```" | |
| ) | |
| # Competitive content analysis prompt | |
| self.competitive_analysis_prompt = ( | |
| "Compare this content against best practices for AI search optimization. Identify gaps and opportunities.\n" | |
| "Original Content: {content}\n" | |
| "Analyze against these AI search factors:\n" | |
| "- Entity recognition and linking\n" | |
| "- Question coverage completeness\n" | |
| "- Factual statement clarity\n" | |
| "- Conversational flow\n" | |
| "- Semantic relationship mapping\n\n" | |
| "Provide competitive analysis in JSON format with specific recommendations:\n" | |
| "{{\n" | |
| " \"competitive_analysis\": {{\n" | |
| " \"entity_gaps\": [\"gap1\", \"gap2\"],\n" | |
| " \"question_coverage\": \"summary of coverage\",\n" | |
| " \"factual_clarity\": \"assessment\",\n" | |
| " \"conversational_flow\": \"assessment\",\n" | |
| " \"semantic_relationships\": [\"relationship1\", \"relationship2\"]\n" | |
| " }},\n" | |
| " \"recommendations\": [\"recommendation 1\", \"recommendation 2\"]\n" | |
| "}}\n" | |
| ) | |
| def optimize_content(self, content: str, analyze_only: bool = False, | |
| include_keywords: bool = True, optimization_type: str = "standard") -> Dict[str, Any]: | |
| """ | |
| Main content optimization function | |
| Args: | |
| content (str): Content to optimize | |
| analyze_only (bool): If True, only analyze without rewriting | |
| include_keywords (bool): Whether to include keyword analysis | |
| optimization_type (str): Type of optimization ("standard", "seo", "competitive") | |
| Returns: | |
| Dict: Optimization results with scores and enhanced content | |
| """ | |
| try: | |
| # Choose optimization approach | |
| if optimization_type == "seo": | |
| return self._seo_style_optimization(content, analyze_only) | |
| elif optimization_type == "competitive": | |
| return self._competitive_optimization(content) | |
| else: | |
| return self._standard_optimization(content, analyze_only, include_keywords) | |
| except Exception as e: | |
| return {'error': f"Optimization failed: {str(e)}"} | |
| def _standard_optimization(self, content: str, analyze_only: bool, include_keywords: bool) -> Dict[str, Any]: | |
| """Standard content optimization using enhancement prompt""" | |
| try: | |
| # Modify prompt based on options | |
| prompt_text = self.enhancement_prompt | |
| if analyze_only: | |
| prompt_text = prompt_text.replace( | |
| "Rewrite the text to improve:", | |
| "Analyze the text for potential improvements in:" | |
| ).replace( | |
| '"optimized_text": "..."', | |
| '"optimization_suggestions": ["suggestion 1", "suggestion 2"]' | |
| ) | |
| if not include_keywords: | |
| prompt_text = prompt_text.replace( | |
| '"keywords": ["example", "installation", "setup"],', | |
| '' | |
| ) | |
| # Create and run chain | |
| prompt_template = ChatPromptTemplate.from_messages([ | |
| SystemMessagePromptTemplate.from_template(prompt_text), | |
| HumanMessagePromptTemplate.from_template(content[:6000]) # Limit content length | |
| ]) | |
| # ("system", prompt_text), | |
| # ("user", content[:6000]) # Limit content length | |
| chain = prompt_template | self.llm | |
| result = chain.invoke({}) | |
| # Parse result | |
| result_content = result.content if hasattr(result, 'content') else str(result) | |
| parsed_result = self._parse_optimization_result(result_content) | |
| # Add metadata | |
| parsed_result.update({ | |
| 'optimization_type': 'standard', | |
| 'analyze_only': analyze_only, | |
| 'original_length': len(content), | |
| 'original_word_count': len(content.split()) | |
| }) | |
| return parsed_result | |
| except Exception as e: | |
| return {'error': f"Standard optimization failed: {str(e)}"} | |
| def _seo_style_optimization(self, content: str, analyze_only: bool) -> Dict[str, Any]: | |
| """SEO-focused optimization for AI search engines""" | |
| try: | |
| prompt_template = ChatPromptTemplate.from_messages([ | |
| ("system", self.seo_style_prompt), | |
| ("user", f"Optimize this content for AI search engines:\n\n{content[:6000]}") | |
| ]) | |
| chain = prompt_template | self.llm | |
| result = chain.invoke({}) | |
| result_content = result.content if hasattr(result, 'content') else str(result) | |
| parsed_result = self._parse_optimization_result(result_content) | |
| # Add SEO-specific metadata | |
| parsed_result.update({ | |
| 'optimization_type': 'seo', | |
| 'analyze_only': analyze_only, | |
| 'seo_focused': True | |
| }) | |
| return parsed_result | |
| except Exception as e: | |
| return {'error': f"SEO optimization failed: {str(e)}"} | |
| def _competitive_optimization(self, content: str) -> Dict[str, Any]: | |
| """Competitive analysis-based optimization""" | |
| try: | |
| formatted_prompt = self.competitive_analysis_prompt.format(content=content[:5000]) | |
| prompt_template = ChatPromptTemplate.from_messages([ | |
| ("system", formatted_prompt), | |
| ("user", "Perform the competitive analysis and provide optimization recommendations.") | |
| ]) | |
| chain = prompt_template | self.llm | |
| result = chain.invoke({}) | |
| result_content = result.content if hasattr(result, 'content') else str(result) | |
| parsed_result = self._parse_optimization_result(result_content) | |
| parsed_result.update({ | |
| 'optimization_type': 'competitive', | |
| 'competitive_analysis': True | |
| }) | |
| return parsed_result | |
| except Exception as e: | |
| return {'error': f"Competitive optimization failed: {str(e)}"} | |
| def batch_optimize_content(self, content_list: List[str], optimization_type: str = "standard") -> List[Dict[str, Any]]: | |
| """ | |
| Optimize multiple pieces of content in batch | |
| Args: | |
| content_list (List[str]): List of content pieces to optimize | |
| optimization_type (str): Type of optimization to apply | |
| Returns: | |
| List[Dict]: List of optimization results | |
| """ | |
| results = [] | |
| for i, content in enumerate(content_list): | |
| try: | |
| result = self.optimize_content( | |
| content, | |
| optimization_type=optimization_type | |
| ) | |
| result['batch_index'] = i | |
| results.append(result) | |
| except Exception as e: | |
| results.append({ | |
| 'batch_index': i, | |
| 'error': f"Batch optimization failed: {str(e)}" | |
| }) | |
| return results | |
| def generate_content_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]: | |
| """ | |
| Generate multiple optimized variations of the same content | |
| Args: | |
| content (str): Original content | |
| num_variations (int): Number of variations to generate | |
| Returns: | |
| List[Dict]: List of content variations with analysis | |
| """ | |
| variations = [] | |
| variation_prompts = [ | |
| "Create a more conversational version optimized for AI chat responses", | |
| "Create a more authoritative version optimized for citations", | |
| "Create a more structured version optimized for question-answering" | |
| ] | |
| for i in range(min(num_variations, len(variation_prompts))): | |
| try: | |
| custom_prompt = f"""You are optimizing content for AI systems. {variation_prompts[i]}. | |
| Original content: {content[:4000]} | |
| Provide the optimized variation in JSON format: | |
| ```json | |
| {{ | |
| "variation_type": "conversational/authoritative/structured", | |
| "optimized_content": "the rewritten content...", | |
| "key_changes": ["change 1", "change 2"], | |
| "target_use_case": "description of ideal use case" | |
| }} | |
| ```""" | |
| prompt_template = ChatPromptTemplate.from_messages([ | |
| ("system", custom_prompt), | |
| ("user", "Generate the variation.") | |
| ]) | |
| chain = prompt_template | self.llm | |
| result = chain.invoke({}) | |
| result_content = result.content if hasattr(result, 'content') else str(result) | |
| parsed_result = self._parse_optimization_result(result_content) | |
| parsed_result.update({ | |
| 'variation_index': i, | |
| 'variation_prompt': variation_prompts[i] | |
| }) | |
| variations.append(parsed_result) | |
| except Exception as e: | |
| variations.append({ | |
| 'variation_index': i, | |
| 'error': f"Variation generation failed: {str(e)}" | |
| }) | |
| return variations | |
| def analyze_content_readability(self, content: str) -> Dict[str, Any]: | |
| """ | |
| Analyze content readability for AI systems | |
| Args: | |
| content (str): Content to analyze | |
| Returns: | |
| Dict: Readability analysis results | |
| """ | |
| try: | |
| # Basic readability metrics | |
| words = content.split() | |
| sentences = re.split(r'[.!?]+', content) | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()] | |
| # Calculate metrics | |
| avg_words_per_sentence = len(words) / len(sentences) if sentences else 0 | |
| avg_sentences_per_paragraph = len(sentences) / len(paragraphs) if paragraphs else 0 | |
| # Character-based metrics | |
| avg_word_length = sum(len(word) for word in words) / len(words) if words else 0 | |
| # Complexity indicators | |
| long_sentences = [s for s in sentences if len(s.split()) > 20] | |
| complex_words = [w for w in words if len(w) > 6] | |
| return { | |
| 'basic_metrics': { | |
| 'total_words': len(words), | |
| 'total_sentences': len(sentences), | |
| 'total_paragraphs': len(paragraphs), | |
| 'avg_words_per_sentence': avg_words_per_sentence, | |
| 'avg_sentences_per_paragraph': avg_sentences_per_paragraph, | |
| 'avg_word_length': avg_word_length | |
| }, | |
| 'complexity_indicators': { | |
| 'long_sentences_count': len(long_sentences), | |
| 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0, | |
| 'complex_words_count': len(complex_words), | |
| 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0 | |
| }, | |
| 'ai_readability_score': self._calculate_ai_readability_score({ | |
| 'avg_words_per_sentence': avg_words_per_sentence, | |
| 'avg_word_length': avg_word_length, | |
| 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0 | |
| }), | |
| 'recommendations': self._generate_readability_recommendations({ | |
| 'avg_words_per_sentence': avg_words_per_sentence, | |
| 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0, | |
| 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0 | |
| }) | |
| } | |
| except Exception as e: | |
| return {'error': f"Readability analysis failed: {str(e)}"} | |
| def extract_key_entities(self, content: str) -> Dict[str, Any]: | |
| """ | |
| Extract key entities and topics for optimization | |
| Args: | |
| content (str): Content to analyze | |
| Returns: | |
| Dict: Extracted entities and topics | |
| """ | |
| try: | |
| entity_prompt = """Extract key entities, topics, and concepts from this content for AI optimization. | |
| Content: {content} | |
| Identify: | |
| 1. Named entities (people, places, organizations) | |
| 2. Key concepts and topics | |
| 3. Technical terms and jargon | |
| 4. Potential semantic keywords | |
| 5. Question-answer opportunities | |
| Format as JSON: | |
| ```json | |
| {{ | |
| "named_entities": ["entity1", "entity2"], | |
| "key_topics": ["topic1", "topic2"], | |
| "technical_terms": ["term1", "term2"], | |
| "semantic_keywords": ["keyword1", "keyword2"], | |
| "question_opportunities": ["What is...", "How does..."], | |
| "entity_relationships": ["relationship descriptions"] | |
| }} | |
| ```""" | |
| prompt_template = ChatPromptTemplate.from_messages([ | |
| ("system", entity_prompt.format(content=content[:5000])), | |
| ("user", "Extract the entities and topics.") | |
| ]) | |
| chain = prompt_template | self.llm | |
| result = chain.invoke({}) | |
| result_content = result.content if hasattr(result, 'content') else str(result) | |
| return self._parse_optimization_result(result_content) | |
| except Exception as e: | |
| return {'error': f"Entity extraction failed: {str(e)}"} | |
| def optimize_for_voice_search(self, content: str) -> Dict[str, Any]: | |
| """ | |
| Optimize content specifically for voice search and conversational AI | |
| Args: | |
| content (str): Content to optimize | |
| Returns: | |
| Dict: Voice search optimization results | |
| """ | |
| try: | |
| voice_prompt = """Optimize this content for voice search and conversational AI systems. | |
| Focus on: | |
| 1. Natural language patterns | |
| 2. Question-based structure | |
| 3. Conversational tone | |
| 4. Clear, direct answers | |
| 5. Featured snippet optimization | |
| Original content: {content} | |
| Provide optimization in JSON: | |
| ```json | |
| {{ | |
| "voice_optimized_content": "conversational version...", | |
| "question_answer_pairs": [ | |
| {{"question": "What is...", "answer": "Direct answer..."}}, | |
| {{"question": "How does...", "answer": "Step by step..."}} | |
| ], | |
| "featured_snippet_candidates": ["snippet 1", "snippet 2"], | |
| "natural_language_improvements": ["improvement 1", "improvement 2"], | |
| "conversational_score": 8.5 | |
| }} | |
| ```""" | |
| prompt_template = ChatPromptTemplate.from_messages([ | |
| ("system", voice_prompt.format(content=content[:4000])), | |
| ("user", "Optimize for voice search.") | |
| ]) | |
| chain = prompt_template | self.llm | |
| result = chain.invoke({}) | |
| result_content = result.content if hasattr(result, 'content') else str(result) | |
| parsed_result = self._parse_optimization_result(result_content) | |
| parsed_result.update({ | |
| 'optimization_type': 'voice_search', | |
| 'voice_optimized': True | |
| }) | |
| return parsed_result | |
| except Exception as e: | |
| return {'error': f"Voice search optimization failed: {str(e)}"} | |
| def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]: | |
| """Parse LLM response and extract structured results""" | |
| try: | |
| # Find JSON content in the response | |
| json_start = response_text.find('{') | |
| json_end = response_text.rfind('}') + 1 | |
| if json_start != -1 and json_end != -1: | |
| json_str = response_text[json_start:json_end] | |
| parsed = json.loads(json_str) | |
| # Ensure consistent structure | |
| if 'scores' not in parsed and 'score' in parsed: | |
| parsed['scores'] = parsed['score'] | |
| return parsed | |
| else: | |
| # If no JSON found, return raw response with error flag | |
| return { | |
| 'raw_response': response_text, | |
| 'parsing_error': 'No JSON structure found in response', | |
| 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0} | |
| } | |
| except json.JSONDecodeError as e: | |
| return { | |
| 'raw_response': response_text, | |
| 'parsing_error': f'JSON decode error: {str(e)}', | |
| 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0} | |
| } | |
| except Exception as e: | |
| return { | |
| 'raw_response': response_text, | |
| 'parsing_error': f'Unexpected parsing error: {str(e)}', | |
| 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0} | |
| } | |
| def _calculate_ai_readability_score(self, metrics: Dict[str, float]) -> float: | |
| """Calculate AI-specific readability score""" | |
| try: | |
| # Optimal ranges for AI consumption | |
| optimal_words_per_sentence = 15 # Sweet spot for AI processing | |
| optimal_word_length = 5 # Balance of complexity and clarity | |
| optimal_complex_words_percentage = 15 # Some complexity is good for authority | |
| # Calculate deviations from optimal | |
| sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - optimal_words_per_sentence) * 0.5) | |
| word_length_score = max(0, 10 - abs(metrics['avg_word_length'] - optimal_word_length) * 2) | |
| complexity_score = max(0, 10 - abs(metrics['complex_words_percentage'] - optimal_complex_words_percentage) * 0.3) | |
| # Weighted average | |
| overall_score = (sentence_score * 0.4 + word_length_score * 0.3 + complexity_score * 0.3) | |
| return round(overall_score, 1) | |
| except Exception: | |
| return 5.0 # Default neutral score | |
| def _generate_readability_recommendations(self, metrics: Dict[str, float]) -> List[str]: | |
| """Generate specific readability improvement recommendations""" | |
| recommendations = [] | |
| try: | |
| if metrics['avg_words_per_sentence'] > 20: | |
| recommendations.append("Break down long sentences for better AI processing") | |
| elif metrics['avg_words_per_sentence'] < 8: | |
| recommendations.append("Consider combining very short sentences for better context") | |
| if metrics['long_sentences_percentage'] > 30: | |
| recommendations.append("Reduce the number of complex sentences (>20 words)") | |
| if metrics['complex_words_percentage'] > 25: | |
| recommendations.append("Simplify vocabulary where possible for broader accessibility") | |
| elif metrics['complex_words_percentage'] < 5: | |
| recommendations.append("Add more specific terminology to establish authority") | |
| return recommendations | |
| except Exception: | |
| return ["Unable to generate specific recommendations"] |