Spaces:

MBilal-72
/

GenerativeEngineOptimization

Runtime error

File size: 23,532 Bytes

"""
Content Optimization Module
Enhances content for better AI/LLM performance and GEO scores
"""

import json
import re
from typing import Dict, Any, List, Optional
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate


class ContentOptimizer:
    """Main class for optimizing content for AI search engines"""

    def __init__(self, llm):
        self.llm = llm
        self.setup_prompts()

    def setup_prompts(self):
        """Initialize optimization prompts"""

        # Main content enhancement prompt
        self.enhancement_prompt = (
            "You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.\n\n"
            "Evaluate the input text based on the following criteria, assigning a score from 1-10 for each:\n"
            "- Clarity: How easily can the content be understood?\n"
            "- Structuredness: How well-organized and coherent is the content?\n"
            "- LLM Answerability: How easily can an LLM extract precise answers from the content?\n\n"
            "Identify the most salient keywords.\n\n"
            "Rewrite the text to improve:\n"
            "- Clarity and precision\n"
            "- Logical structure and flow\n"
            "- Suitability for LLM-based information retrieval\n\n"
            "Present your analysis and optimized text in the following JSON format:\n"
            "```json\n"
            "{{\n"
            "  \"scores\": {{\n"
            "    \"clarity\": 8.5,\n"
            "    \"structuredness\": 7.0,\n"
            "    \"answerability\": 9.0\n"
            "  }},\n"
            "  \"keywords\": [\"example\", \"installation\", \"setup\"],\n"
            "  \"optimized_text\": \"...\"\n"
            "}}\n"
            "```"
        )

        # SEO-style optimization prompt
        self.seo_style_prompt = (
            "You are an AI-first SEO specialist. Optimize this content for AI search engines and LLM systems. "
            "Focus on:\n"
            "1. Semantic keyword optimization\n"
            "2. Question-answer format enhancement\n"
            "3. Factual accuracy and authority signals\n"
            "4. Conversational readiness\n"
            "5. Citation-worthy structure\n"
            "Provide analysis and optimization in JSON:\n"
            "```json\n"
            "{{\n"
            "  \"seo_analysis\": {{\n"
            "    \"keyword_density\": \"analysis of current keywords\",\n"
            "    \"semantic_gaps\": [\"missing semantic terms\"],\n"
            "    \"readability_score\": 8.5,\n"
            "    \"authority_signals\": [\"credentials\", \"citations\"]\n"
            "  }},\n"
            "  \"optimized_content\": {{\n"
            "    \"title_suggestions\": [\"optimized title 1\", \"optimized title 2\"],\n"
            "    \"meta_description\": \"AI-optimized meta description\",\n"
            "    \"enhanced_content\": \"full optimized content...\",\n"
            "    \"structured_data_suggestions\": [\"schema markup recommendations\"]\n"
            "  }},\n"
            "  \"improvement_summary\": {{\n"
            "    \"changes_made\": [\"change 1\", \"change 2\"],\n"
            "    \"expected_impact\": \"description of expected improvements\"\n"
            "  }}\n"
            "}}\n"
            "```"
        )

        # Competitive content analysis prompt
        self.competitive_analysis_prompt = (
            "Compare this content against best practices for AI search optimization. Identify gaps and opportunities.\n"
            "Original Content: {content}\n"
            "Analyze against these AI search factors:\n"
            "- Entity recognition and linking\n"
            "- Question coverage completeness\n"
            "- Factual statement clarity\n"
            "- Conversational flow\n"
            "- Semantic relationship mapping\n\n"
            "Provide competitive analysis in JSON format with specific recommendations:\n"
            "{{\n"
            "  \"competitive_analysis\": {{\n"
            "    \"entity_gaps\": [\"gap1\", \"gap2\"],\n"
            "    \"question_coverage\": \"summary of coverage\",\n"
            "    \"factual_clarity\": \"assessment\",\n"
            "    \"conversational_flow\": \"assessment\",\n"
            "    \"semantic_relationships\": [\"relationship1\", \"relationship2\"]\n"
            "  }},\n"
            "  \"recommendations\": [\"recommendation 1\", \"recommendation 2\"]\n"
            "}}\n"
        )
    
    def optimize_content(self, content: str, analyze_only: bool = False, 
                        include_keywords: bool = True, optimization_type: str = "standard") -> Dict[str, Any]:
        """
            Main content optimization function
            Args:
                content (str): Content to optimize
                analyze_only (bool): If True, only analyze without rewriting
                include_keywords (bool): Whether to include keyword analysis
                optimization_type (str): Type of optimization ("standard", "seo", "competitive")  
            Returns:
                Dict: Optimization results with scores and enhanced content
        """
        try:
            # Choose optimization approach
            if optimization_type == "seo":
                return self._seo_style_optimization(content, analyze_only)
            elif optimization_type == "competitive":
                return self._competitive_optimization(content)
            else:
                return self._standard_optimization(content, analyze_only, include_keywords)
                
        except Exception as e:
            return {'error': f"Optimization failed: {str(e)}"}
    
    def _standard_optimization(self, content: str, analyze_only: bool, include_keywords: bool) -> Dict[str, Any]:
        """Standard content optimization using enhancement prompt"""
        try:
            # Modify prompt based on options
            prompt_text = self.enhancement_prompt
            
            if analyze_only:
                prompt_text = prompt_text.replace(
                    "Rewrite the text to improve:",
                    "Analyze the text for potential improvements in:"
                ).replace(
                    '"optimized_text": "..."',
                    '"optimization_suggestions": ["suggestion 1", "suggestion 2"]'
                )
            
            if not include_keywords:
                prompt_text = prompt_text.replace(
                    '"keywords": ["example", "installation", "setup"],',
                    ''
                )
            
            # Create and run chain
            prompt_template = ChatPromptTemplate.from_messages([
                 SystemMessagePromptTemplate.from_template(prompt_text),
                HumanMessagePromptTemplate.from_template(content[:6000])  # Limit content length
            ])
                # ("system", prompt_text),
                # ("user", content[:6000])  # Limit content length
            
            chain = prompt_template | self.llm
            result = chain.invoke({})
            
            # Parse result
            result_content = result.content if hasattr(result, 'content') else str(result)
            parsed_result = self._parse_optimization_result(result_content)
            
            # Add metadata
            parsed_result.update({
                'optimization_type': 'standard',
                'analyze_only': analyze_only,
                'original_length': len(content),
                'original_word_count': len(content.split())
            })
            
            return parsed_result
            
        except Exception as e:
            return {'error': f"Standard optimization failed: {str(e)}"}
    
    def _seo_style_optimization(self, content: str, analyze_only: bool) -> Dict[str, Any]:
        """SEO-focused optimization for AI search engines"""
        try:
            prompt_template = ChatPromptTemplate.from_messages([
                ("system", self.seo_style_prompt),
                ("user", f"Optimize this content for AI search engines:\n\n{content[:6000]}")
            ])
            
            chain = prompt_template | self.llm
            result = chain.invoke({})
            
            result_content = result.content if hasattr(result, 'content') else str(result)
            parsed_result = self._parse_optimization_result(result_content)
            
            # Add SEO-specific metadata
            parsed_result.update({
                'optimization_type': 'seo',
                'analyze_only': analyze_only,
                'seo_focused': True
            })
            
            return parsed_result
            
        except Exception as e:
            return {'error': f"SEO optimization failed: {str(e)}"}
    
    def _competitive_optimization(self, content: str) -> Dict[str, Any]:
        """Competitive analysis-based optimization"""
        try:
            formatted_prompt = self.competitive_analysis_prompt.format(content=content[:5000])
            
            prompt_template = ChatPromptTemplate.from_messages([
                ("system", formatted_prompt),
                ("user", "Perform the competitive analysis and provide optimization recommendations.")
            ])
            
            chain = prompt_template | self.llm
            result = chain.invoke({})
            
            result_content = result.content if hasattr(result, 'content') else str(result)
            parsed_result = self._parse_optimization_result(result_content)
            
            parsed_result.update({
                'optimization_type': 'competitive',
                'competitive_analysis': True
            })
            
            return parsed_result
            
        except Exception as e:
            return {'error': f"Competitive optimization failed: {str(e)}"}
    
    def batch_optimize_content(self, content_list: List[str], optimization_type: str = "standard") -> List[Dict[str, Any]]:
        """
        Optimize multiple pieces of content in batch
        
        Args:
            content_list (List[str]): List of content pieces to optimize
            optimization_type (str): Type of optimization to apply
            
        Returns:
            List[Dict]: List of optimization results
        """
        results = []
        
        for i, content in enumerate(content_list):
            try:
                result = self.optimize_content(
                    content, 
                    optimization_type=optimization_type
                )
                result['batch_index'] = i
                results.append(result)
                
            except Exception as e:
                results.append({
                    'batch_index': i,
                    'error': f"Batch optimization failed: {str(e)}"
                })
        
        return results
    
    def generate_content_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]:
        """
        Generate multiple optimized variations of the same content
        
        Args:
            content (str): Original content
            num_variations (int): Number of variations to generate
            
        Returns:
            List[Dict]: List of content variations with analysis
        """
        variations = []
        
        variation_prompts = [
            "Create a more conversational version optimized for AI chat responses",
            "Create a more authoritative version optimized for citations",
            "Create a more structured version optimized for question-answering"
        ]
        
        for i in range(min(num_variations, len(variation_prompts))):
            try:
                custom_prompt = f"""You are optimizing content for AI systems. {variation_prompts[i]}.

Original content: {content[:4000]}

Provide the optimized variation in JSON format:
```json
{{
"variation_type": "conversational/authoritative/structured",
"optimized_content": "the rewritten content...",
"key_changes": ["change 1", "change 2"],
"target_use_case": "description of ideal use case"
}}
```"""
                
                prompt_template = ChatPromptTemplate.from_messages([
                    ("system", custom_prompt),
                    ("user", "Generate the variation.")
                ])
                
                chain = prompt_template | self.llm
                result = chain.invoke({})
                
                result_content = result.content if hasattr(result, 'content') else str(result)
                parsed_result = self._parse_optimization_result(result_content)
                
                parsed_result.update({
                    'variation_index': i,
                    'variation_prompt': variation_prompts[i]
                })
                
                variations.append(parsed_result)
                
            except Exception as e:
                variations.append({
                    'variation_index': i,
                    'error': f"Variation generation failed: {str(e)}"
                })
        
        return variations
    
    def analyze_content_readability(self, content: str) -> Dict[str, Any]:
        """
        Analyze content readability for AI systems
        
        Args:
            content (str): Content to analyze
            
        Returns:
            Dict: Readability analysis results
        """
        try:
            # Basic readability metrics
            words = content.split()
            sentences = re.split(r'[.!?]+', content)
            sentences = [s.strip() for s in sentences if s.strip()]
            
            paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
            
            # Calculate metrics
            avg_words_per_sentence = len(words) / len(sentences) if sentences else 0
            avg_sentences_per_paragraph = len(sentences) / len(paragraphs) if paragraphs else 0
            
            # Character-based metrics
            avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
            
            # Complexity indicators
            long_sentences = [s for s in sentences if len(s.split()) > 20]
            complex_words = [w for w in words if len(w) > 6]
            
            return {
                'basic_metrics': {
                    'total_words': len(words),
                    'total_sentences': len(sentences),
                    'total_paragraphs': len(paragraphs),
                    'avg_words_per_sentence': avg_words_per_sentence,
                    'avg_sentences_per_paragraph': avg_sentences_per_paragraph,
                    'avg_word_length': avg_word_length
                },
                'complexity_indicators': {
                    'long_sentences_count': len(long_sentences),
                    'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
                    'complex_words_count': len(complex_words),
                    'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
                },
                'ai_readability_score': self._calculate_ai_readability_score({
                    'avg_words_per_sentence': avg_words_per_sentence,
                    'avg_word_length': avg_word_length,
                    'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
                }),
                'recommendations': self._generate_readability_recommendations({
                    'avg_words_per_sentence': avg_words_per_sentence,
                    'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
                    'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
                })
            }
            
        except Exception as e:
            return {'error': f"Readability analysis failed: {str(e)}"}
    
    def extract_key_entities(self, content: str) -> Dict[str, Any]:
        """
        Extract key entities and topics for optimization
        
        Args:
            content (str): Content to analyze
            
        Returns:
            Dict: Extracted entities and topics
        """
        try:
            entity_prompt = """Extract key entities, topics, and concepts from this content for AI optimization.

Content: {content}

Identify:
1. Named entities (people, places, organizations)
2. Key concepts and topics
3. Technical terms and jargon
4. Potential semantic keywords
5. Question-answer opportunities

Format as JSON:
```json
{{
"named_entities": ["entity1", "entity2"],
"key_topics": ["topic1", "topic2"],
"technical_terms": ["term1", "term2"],
"semantic_keywords": ["keyword1", "keyword2"],
"question_opportunities": ["What is...", "How does..."],
"entity_relationships": ["relationship descriptions"]
}}
```"""
            
            prompt_template = ChatPromptTemplate.from_messages([
                ("system", entity_prompt.format(content=content[:5000])),
                ("user", "Extract the entities and topics.")
            ])
            
            chain = prompt_template | self.llm
            result = chain.invoke({})
            
            result_content = result.content if hasattr(result, 'content') else str(result)
            return self._parse_optimization_result(result_content)
            
        except Exception as e:
            return {'error': f"Entity extraction failed: {str(e)}"}
    
    def optimize_for_voice_search(self, content: str) -> Dict[str, Any]:
        """
        Optimize content specifically for voice search and conversational AI
        
        Args:
            content (str): Content to optimize
            
        Returns:
            Dict: Voice search optimization results
        """
        try:
            voice_prompt = """Optimize this content for voice search and conversational AI systems.

Focus on:
1. Natural language patterns
2. Question-based structure
3. Conversational tone
4. Clear, direct answers
5. Featured snippet optimization

Original content: {content}

Provide optimization in JSON:
```json
{{
"voice_optimized_content": "conversational version...",
"question_answer_pairs": [
  {{"question": "What is...", "answer": "Direct answer..."}},
  {{"question": "How does...", "answer": "Step by step..."}}
],
"featured_snippet_candidates": ["snippet 1", "snippet 2"],
"natural_language_improvements": ["improvement 1", "improvement 2"],
"conversational_score": 8.5
}}
```"""
            
            prompt_template = ChatPromptTemplate.from_messages([
                ("system", voice_prompt.format(content=content[:4000])),
                ("user", "Optimize for voice search.")
            ])
            
            chain = prompt_template | self.llm
            result = chain.invoke({})
            
            result_content = result.content if hasattr(result, 'content') else str(result)
            parsed_result = self._parse_optimization_result(result_content)
            
            parsed_result.update({
                'optimization_type': 'voice_search',
                'voice_optimized': True
            })
            
            return parsed_result
            
        except Exception as e:
            return {'error': f"Voice search optimization failed: {str(e)}"}
    
    def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]:
        """Parse LLM response and extract structured results"""
        try:
            # Find JSON content in the response
            json_start = response_text.find('{')
            json_end = response_text.rfind('}') + 1
            
            if json_start != -1 and json_end != -1:
                json_str = response_text[json_start:json_end]
                parsed = json.loads(json_str)
                
                # Ensure consistent structure
                if 'scores' not in parsed and 'score' in parsed:
                    parsed['scores'] = parsed['score']
                
                return parsed
            else:
                # If no JSON found, return raw response with error flag
                return {
                    'raw_response': response_text,
                    'parsing_error': 'No JSON structure found in response',
                    'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
                }
                
        except json.JSONDecodeError as e:
            return {
                'raw_response': response_text,
                'parsing_error': f'JSON decode error: {str(e)}',
                'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
            }
        except Exception as e:
            return {
                'raw_response': response_text,
                'parsing_error': f'Unexpected parsing error: {str(e)}',
                'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
            }
    
    def _calculate_ai_readability_score(self, metrics: Dict[str, float]) -> float:
        """Calculate AI-specific readability score"""
        try:
            # Optimal ranges for AI consumption
            optimal_words_per_sentence = 15  # Sweet spot for AI processing
            optimal_word_length = 5  # Balance of complexity and clarity
            optimal_complex_words_percentage = 15  # Some complexity is good for authority
            
            # Calculate deviations from optimal
            sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - optimal_words_per_sentence) * 0.5)
            word_length_score = max(0, 10 - abs(metrics['avg_word_length'] - optimal_word_length) * 2)
            complexity_score = max(0, 10 - abs(metrics['complex_words_percentage'] - optimal_complex_words_percentage) * 0.3)
            
            # Weighted average
            overall_score = (sentence_score * 0.4 + word_length_score * 0.3 + complexity_score * 0.3)
            
            return round(overall_score, 1)
            
        except Exception:
            return 5.0  # Default neutral score
    
    def _generate_readability_recommendations(self, metrics: Dict[str, float]) -> List[str]:
        """Generate specific readability improvement recommendations"""
        recommendations = []
        
        try:
            if metrics['avg_words_per_sentence'] > 20:
                recommendations.append("Break down long sentences for better AI processing")
            elif metrics['avg_words_per_sentence'] < 8:
                recommendations.append("Consider combining very short sentences for better context")
            
            if metrics['long_sentences_percentage'] > 30:
                recommendations.append("Reduce the number of complex sentences (>20 words)")
            
            if metrics['complex_words_percentage'] > 25:
                recommendations.append("Simplify vocabulary where possible for broader accessibility")
            elif metrics['complex_words_percentage'] < 5:
                recommendations.append("Add more specific terminology to establish authority")
            
            return recommendations
            
        except Exception:
            return ["Unable to generate specific recommendations"]