|
|
""" |
|
|
Content Optimization Module |
|
|
Enhances content for better AI/LLM performance and GEO scores |
|
|
""" |
|
|
|
|
|
import json |
|
|
import re |
|
|
from typing import Dict, Any, List, Optional |
|
|
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate |
|
|
|
|
|
|
|
|
class ContentOptimizer: |
|
|
"""Main class for optimizing content for AI search engines""" |
|
|
|
|
|
def __init__(self, llm): |
|
|
self.llm = llm |
|
|
self.setup_prompts() |
|
|
|
|
|
def setup_prompts(self): |
|
|
"""Initialize optimization prompts""" |
|
|
|
|
|
|
|
|
self.enhancement_prompt = ( |
|
|
"You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.\n\n" |
|
|
"Evaluate the input text based on the following criteria, assigning a score from 1-10 for each:\n" |
|
|
"- Clarity: How easily can the content be understood?\n" |
|
|
"- Structuredness: How well-organized and coherent is the content?\n" |
|
|
"- LLM Answerability: How easily can an LLM extract precise answers from the content?\n\n" |
|
|
"Identify the most salient keywords.\n\n" |
|
|
"Rewrite the text to improve:\n" |
|
|
"- Clarity and precision\n" |
|
|
"- Logical structure and flow\n" |
|
|
"- Suitability for LLM-based information retrieval\n\n" |
|
|
"Present your analysis and optimized text in the following JSON format:\n" |
|
|
"```json\n" |
|
|
"{{\n" |
|
|
" \"scores\": {{\n" |
|
|
" \"clarity\": 8.5,\n" |
|
|
" \"structuredness\": 7.0,\n" |
|
|
" \"answerability\": 9.0\n" |
|
|
" }},\n" |
|
|
" \"keywords\": [\"example\", \"installation\", \"setup\"],\n" |
|
|
" \"optimized_text\": \"...\"\n" |
|
|
"}}\n" |
|
|
"```" |
|
|
) |
|
|
|
|
|
|
|
|
self.seo_style_prompt = ( |
|
|
"You are an AI-first SEO specialist. Optimize this content for AI search engines and LLM systems. " |
|
|
"Focus on:\n" |
|
|
"1. Semantic keyword optimization\n" |
|
|
"2. Question-answer format enhancement\n" |
|
|
"3. Factual accuracy and authority signals\n" |
|
|
"4. Conversational readiness\n" |
|
|
"5. Citation-worthy structure\n" |
|
|
"Provide analysis and optimization in JSON:\n" |
|
|
"```json\n" |
|
|
"{{\n" |
|
|
" \"seo_analysis\": {{\n" |
|
|
" \"keyword_density\": \"analysis of current keywords\",\n" |
|
|
" \"semantic_gaps\": [\"missing semantic terms\"],\n" |
|
|
" \"readability_score\": 8.5,\n" |
|
|
" \"authority_signals\": [\"credentials\", \"citations\"]\n" |
|
|
" }},\n" |
|
|
" \"optimized_content\": {{\n" |
|
|
" \"title_suggestions\": [\"optimized title 1\", \"optimized title 2\"],\n" |
|
|
" \"meta_description\": \"AI-optimized meta description\",\n" |
|
|
" \"enhanced_content\": \"full optimized content...\",\n" |
|
|
" \"structured_data_suggestions\": [\"schema markup recommendations\"]\n" |
|
|
" }},\n" |
|
|
" \"improvement_summary\": {{\n" |
|
|
" \"changes_made\": [\"change 1\", \"change 2\"],\n" |
|
|
" \"expected_impact\": \"description of expected improvements\"\n" |
|
|
" }}\n" |
|
|
"}}\n" |
|
|
"```" |
|
|
) |
|
|
|
|
|
|
|
|
self.competitive_analysis_prompt = ( |
|
|
"Compare this content against best practices for AI search optimization. Identify gaps and opportunities.\n" |
|
|
"Original Content: {content}\n" |
|
|
"Analyze against these AI search factors:\n" |
|
|
"- Entity recognition and linking\n" |
|
|
"- Question coverage completeness\n" |
|
|
"- Factual statement clarity\n" |
|
|
"- Conversational flow\n" |
|
|
"- Semantic relationship mapping\n\n" |
|
|
"Provide competitive analysis in JSON format with specific recommendations:\n" |
|
|
"{{\n" |
|
|
" \"competitive_analysis\": {{\n" |
|
|
" \"entity_gaps\": [\"gap1\", \"gap2\"],\n" |
|
|
" \"question_coverage\": \"summary of coverage\",\n" |
|
|
" \"factual_clarity\": \"assessment\",\n" |
|
|
" \"conversational_flow\": \"assessment\",\n" |
|
|
" \"semantic_relationships\": [\"relationship1\", \"relationship2\"]\n" |
|
|
" }},\n" |
|
|
" \"recommendations\": [\"recommendation 1\", \"recommendation 2\"]\n" |
|
|
"}}\n" |
|
|
) |
|
|
|
|
|
def optimize_content(self, content: str, analyze_only: bool = False, |
|
|
include_keywords: bool = True, optimization_type: str = "standard") -> Dict[str, Any]: |
|
|
""" |
|
|
Main content optimization function |
|
|
Args: |
|
|
content (str): Content to optimize |
|
|
analyze_only (bool): If True, only analyze without rewriting |
|
|
include_keywords (bool): Whether to include keyword analysis |
|
|
optimization_type (str): Type of optimization ("standard", "seo", "competitive") |
|
|
Returns: |
|
|
Dict: Optimization results with scores and enhanced content |
|
|
""" |
|
|
try: |
|
|
|
|
|
if optimization_type == "seo": |
|
|
return self._seo_style_optimization(content, analyze_only) |
|
|
elif optimization_type == "competitive": |
|
|
return self._competitive_optimization(content) |
|
|
else: |
|
|
return self._standard_optimization(content, analyze_only, include_keywords) |
|
|
|
|
|
except Exception as e: |
|
|
return {'error': f"Optimization failed: {str(e)}"} |
|
|
|
|
|
def _standard_optimization(self, content: str, analyze_only: bool, include_keywords: bool) -> Dict[str, Any]: |
|
|
"""Standard content optimization using enhancement prompt""" |
|
|
try: |
|
|
|
|
|
prompt_text = self.enhancement_prompt |
|
|
|
|
|
if analyze_only: |
|
|
prompt_text = prompt_text.replace( |
|
|
"Rewrite the text to improve:", |
|
|
"Analyze the text for potential improvements in:" |
|
|
).replace( |
|
|
'"optimized_text": "..."', |
|
|
'"optimization_suggestions": ["suggestion 1", "suggestion 2"]' |
|
|
) |
|
|
|
|
|
if not include_keywords: |
|
|
prompt_text = prompt_text.replace( |
|
|
'"keywords": ["example", "installation", "setup"],', |
|
|
'' |
|
|
) |
|
|
|
|
|
|
|
|
prompt_template = ChatPromptTemplate.from_messages([ |
|
|
SystemMessagePromptTemplate.from_template(prompt_text), |
|
|
HumanMessagePromptTemplate.from_template(content[:6000]) |
|
|
]) |
|
|
|
|
|
|
|
|
|
|
|
chain = prompt_template | self.llm |
|
|
result = chain.invoke({}) |
|
|
|
|
|
|
|
|
result_content = result.content if hasattr(result, 'content') else str(result) |
|
|
parsed_result = self._parse_optimization_result(result_content) |
|
|
|
|
|
|
|
|
parsed_result.update({ |
|
|
'optimization_type': 'standard', |
|
|
'analyze_only': analyze_only, |
|
|
'original_length': len(content), |
|
|
'original_word_count': len(content.split()) |
|
|
}) |
|
|
|
|
|
return parsed_result |
|
|
|
|
|
except Exception as e: |
|
|
return {'error': f"Standard optimization failed: {str(e)}"} |
|
|
|
|
|
def _seo_style_optimization(self, content: str, analyze_only: bool) -> Dict[str, Any]: |
|
|
"""SEO-focused optimization for AI search engines""" |
|
|
try: |
|
|
prompt_template = ChatPromptTemplate.from_messages([ |
|
|
("system", self.seo_style_prompt), |
|
|
("user", f"Optimize this content for AI search engines:\n\n{content[:6000]}") |
|
|
]) |
|
|
|
|
|
chain = prompt_template | self.llm |
|
|
result = chain.invoke({}) |
|
|
|
|
|
result_content = result.content if hasattr(result, 'content') else str(result) |
|
|
parsed_result = self._parse_optimization_result(result_content) |
|
|
|
|
|
|
|
|
parsed_result.update({ |
|
|
'optimization_type': 'seo', |
|
|
'analyze_only': analyze_only, |
|
|
'seo_focused': True |
|
|
}) |
|
|
|
|
|
return parsed_result |
|
|
|
|
|
except Exception as e: |
|
|
return {'error': f"SEO optimization failed: {str(e)}"} |
|
|
|
|
|
def _competitive_optimization(self, content: str) -> Dict[str, Any]: |
|
|
"""Competitive analysis-based optimization""" |
|
|
try: |
|
|
formatted_prompt = self.competitive_analysis_prompt.format(content=content[:5000]) |
|
|
|
|
|
prompt_template = ChatPromptTemplate.from_messages([ |
|
|
("system", formatted_prompt), |
|
|
("user", "Perform the competitive analysis and provide optimization recommendations.") |
|
|
]) |
|
|
|
|
|
chain = prompt_template | self.llm |
|
|
result = chain.invoke({}) |
|
|
|
|
|
result_content = result.content if hasattr(result, 'content') else str(result) |
|
|
parsed_result = self._parse_optimization_result(result_content) |
|
|
|
|
|
parsed_result.update({ |
|
|
'optimization_type': 'competitive', |
|
|
'competitive_analysis': True |
|
|
}) |
|
|
|
|
|
return parsed_result |
|
|
|
|
|
except Exception as e: |
|
|
return {'error': f"Competitive optimization failed: {str(e)}"} |
|
|
|
|
|
def batch_optimize_content(self, content_list: List[str], optimization_type: str = "standard") -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Optimize multiple pieces of content in batch |
|
|
|
|
|
Args: |
|
|
content_list (List[str]): List of content pieces to optimize |
|
|
optimization_type (str): Type of optimization to apply |
|
|
|
|
|
Returns: |
|
|
List[Dict]: List of optimization results |
|
|
""" |
|
|
results = [] |
|
|
|
|
|
for i, content in enumerate(content_list): |
|
|
try: |
|
|
result = self.optimize_content( |
|
|
content, |
|
|
optimization_type=optimization_type |
|
|
) |
|
|
result['batch_index'] = i |
|
|
results.append(result) |
|
|
|
|
|
except Exception as e: |
|
|
results.append({ |
|
|
'batch_index': i, |
|
|
'error': f"Batch optimization failed: {str(e)}" |
|
|
}) |
|
|
|
|
|
return results |
|
|
|
|
|
def generate_content_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Generate multiple optimized variations of the same content |
|
|
|
|
|
Args: |
|
|
content (str): Original content |
|
|
num_variations (int): Number of variations to generate |
|
|
|
|
|
Returns: |
|
|
List[Dict]: List of content variations with analysis |
|
|
""" |
|
|
variations = [] |
|
|
|
|
|
variation_prompts = [ |
|
|
"Create a more conversational version optimized for AI chat responses", |
|
|
"Create a more authoritative version optimized for citations", |
|
|
"Create a more structured version optimized for question-answering" |
|
|
] |
|
|
|
|
|
for i in range(min(num_variations, len(variation_prompts))): |
|
|
try: |
|
|
custom_prompt = f"""You are optimizing content for AI systems. {variation_prompts[i]}. |
|
|
|
|
|
Original content: {content[:4000]} |
|
|
|
|
|
Provide the optimized variation in JSON format: |
|
|
```json |
|
|
{{ |
|
|
"variation_type": "conversational/authoritative/structured", |
|
|
"optimized_content": "the rewritten content...", |
|
|
"key_changes": ["change 1", "change 2"], |
|
|
"target_use_case": "description of ideal use case" |
|
|
}} |
|
|
```""" |
|
|
|
|
|
prompt_template = ChatPromptTemplate.from_messages([ |
|
|
("system", custom_prompt), |
|
|
("user", "Generate the variation.") |
|
|
]) |
|
|
|
|
|
chain = prompt_template | self.llm |
|
|
result = chain.invoke({}) |
|
|
|
|
|
result_content = result.content if hasattr(result, 'content') else str(result) |
|
|
parsed_result = self._parse_optimization_result(result_content) |
|
|
|
|
|
parsed_result.update({ |
|
|
'variation_index': i, |
|
|
'variation_prompt': variation_prompts[i] |
|
|
}) |
|
|
|
|
|
variations.append(parsed_result) |
|
|
|
|
|
except Exception as e: |
|
|
variations.append({ |
|
|
'variation_index': i, |
|
|
'error': f"Variation generation failed: {str(e)}" |
|
|
}) |
|
|
|
|
|
return variations |
|
|
|
|
|
def analyze_content_readability(self, content: str) -> Dict[str, Any]: |
|
|
""" |
|
|
Analyze content readability for AI systems |
|
|
|
|
|
Args: |
|
|
content (str): Content to analyze |
|
|
|
|
|
Returns: |
|
|
Dict: Readability analysis results |
|
|
""" |
|
|
try: |
|
|
|
|
|
words = content.split() |
|
|
sentences = re.split(r'[.!?]+', content) |
|
|
sentences = [s.strip() for s in sentences if s.strip()] |
|
|
|
|
|
paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()] |
|
|
|
|
|
|
|
|
avg_words_per_sentence = len(words) / len(sentences) if sentences else 0 |
|
|
avg_sentences_per_paragraph = len(sentences) / len(paragraphs) if paragraphs else 0 |
|
|
|
|
|
|
|
|
avg_word_length = sum(len(word) for word in words) / len(words) if words else 0 |
|
|
|
|
|
|
|
|
long_sentences = [s for s in sentences if len(s.split()) > 20] |
|
|
complex_words = [w for w in words if len(w) > 6] |
|
|
|
|
|
return { |
|
|
'basic_metrics': { |
|
|
'total_words': len(words), |
|
|
'total_sentences': len(sentences), |
|
|
'total_paragraphs': len(paragraphs), |
|
|
'avg_words_per_sentence': avg_words_per_sentence, |
|
|
'avg_sentences_per_paragraph': avg_sentences_per_paragraph, |
|
|
'avg_word_length': avg_word_length |
|
|
}, |
|
|
'complexity_indicators': { |
|
|
'long_sentences_count': len(long_sentences), |
|
|
'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0, |
|
|
'complex_words_count': len(complex_words), |
|
|
'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0 |
|
|
}, |
|
|
'ai_readability_score': self._calculate_ai_readability_score({ |
|
|
'avg_words_per_sentence': avg_words_per_sentence, |
|
|
'avg_word_length': avg_word_length, |
|
|
'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0 |
|
|
}), |
|
|
'recommendations': self._generate_readability_recommendations({ |
|
|
'avg_words_per_sentence': avg_words_per_sentence, |
|
|
'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0, |
|
|
'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0 |
|
|
}) |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
return {'error': f"Readability analysis failed: {str(e)}"} |
|
|
|
|
|
def extract_key_entities(self, content: str) -> Dict[str, Any]: |
|
|
""" |
|
|
Extract key entities and topics for optimization |
|
|
|
|
|
Args: |
|
|
content (str): Content to analyze |
|
|
|
|
|
Returns: |
|
|
Dict: Extracted entities and topics |
|
|
""" |
|
|
try: |
|
|
entity_prompt = """Extract key entities, topics, and concepts from this content for AI optimization. |
|
|
|
|
|
Content: {content} |
|
|
|
|
|
Identify: |
|
|
1. Named entities (people, places, organizations) |
|
|
2. Key concepts and topics |
|
|
3. Technical terms and jargon |
|
|
4. Potential semantic keywords |
|
|
5. Question-answer opportunities |
|
|
|
|
|
Format as JSON: |
|
|
```json |
|
|
{{ |
|
|
"named_entities": ["entity1", "entity2"], |
|
|
"key_topics": ["topic1", "topic2"], |
|
|
"technical_terms": ["term1", "term2"], |
|
|
"semantic_keywords": ["keyword1", "keyword2"], |
|
|
"question_opportunities": ["What is...", "How does..."], |
|
|
"entity_relationships": ["relationship descriptions"] |
|
|
}} |
|
|
```""" |
|
|
|
|
|
prompt_template = ChatPromptTemplate.from_messages([ |
|
|
("system", entity_prompt.format(content=content[:5000])), |
|
|
("user", "Extract the entities and topics.") |
|
|
]) |
|
|
|
|
|
chain = prompt_template | self.llm |
|
|
result = chain.invoke({}) |
|
|
|
|
|
result_content = result.content if hasattr(result, 'content') else str(result) |
|
|
return self._parse_optimization_result(result_content) |
|
|
|
|
|
except Exception as e: |
|
|
return {'error': f"Entity extraction failed: {str(e)}"} |
|
|
|
|
|
def optimize_for_voice_search(self, content: str) -> Dict[str, Any]: |
|
|
""" |
|
|
Optimize content specifically for voice search and conversational AI |
|
|
|
|
|
Args: |
|
|
content (str): Content to optimize |
|
|
|
|
|
Returns: |
|
|
Dict: Voice search optimization results |
|
|
""" |
|
|
try: |
|
|
voice_prompt = """Optimize this content for voice search and conversational AI systems. |
|
|
|
|
|
Focus on: |
|
|
1. Natural language patterns |
|
|
2. Question-based structure |
|
|
3. Conversational tone |
|
|
4. Clear, direct answers |
|
|
5. Featured snippet optimization |
|
|
|
|
|
Original content: {content} |
|
|
|
|
|
Provide optimization in JSON: |
|
|
```json |
|
|
{{ |
|
|
"voice_optimized_content": "conversational version...", |
|
|
"question_answer_pairs": [ |
|
|
{{"question": "What is...", "answer": "Direct answer..."}}, |
|
|
{{"question": "How does...", "answer": "Step by step..."}} |
|
|
], |
|
|
"featured_snippet_candidates": ["snippet 1", "snippet 2"], |
|
|
"natural_language_improvements": ["improvement 1", "improvement 2"], |
|
|
"conversational_score": 8.5 |
|
|
}} |
|
|
```""" |
|
|
|
|
|
prompt_template = ChatPromptTemplate.from_messages([ |
|
|
("system", voice_prompt.format(content=content[:4000])), |
|
|
("user", "Optimize for voice search.") |
|
|
]) |
|
|
|
|
|
chain = prompt_template | self.llm |
|
|
result = chain.invoke({}) |
|
|
|
|
|
result_content = result.content if hasattr(result, 'content') else str(result) |
|
|
parsed_result = self._parse_optimization_result(result_content) |
|
|
|
|
|
parsed_result.update({ |
|
|
'optimization_type': 'voice_search', |
|
|
'voice_optimized': True |
|
|
}) |
|
|
|
|
|
return parsed_result |
|
|
|
|
|
except Exception as e: |
|
|
return {'error': f"Voice search optimization failed: {str(e)}"} |
|
|
|
|
|
def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]: |
|
|
"""Parse LLM response and extract structured results""" |
|
|
try: |
|
|
|
|
|
json_start = response_text.find('{') |
|
|
json_end = response_text.rfind('}') + 1 |
|
|
|
|
|
if json_start != -1 and json_end != -1: |
|
|
json_str = response_text[json_start:json_end] |
|
|
parsed = json.loads(json_str) |
|
|
|
|
|
|
|
|
if 'scores' not in parsed and 'score' in parsed: |
|
|
parsed['scores'] = parsed['score'] |
|
|
|
|
|
return parsed |
|
|
else: |
|
|
|
|
|
return { |
|
|
'raw_response': response_text, |
|
|
'parsing_error': 'No JSON structure found in response', |
|
|
'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0} |
|
|
} |
|
|
|
|
|
except json.JSONDecodeError as e: |
|
|
return { |
|
|
'raw_response': response_text, |
|
|
'parsing_error': f'JSON decode error: {str(e)}', |
|
|
'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0} |
|
|
} |
|
|
except Exception as e: |
|
|
return { |
|
|
'raw_response': response_text, |
|
|
'parsing_error': f'Unexpected parsing error: {str(e)}', |
|
|
'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0} |
|
|
} |
|
|
|
|
|
def _calculate_ai_readability_score(self, metrics: Dict[str, float]) -> float: |
|
|
"""Calculate AI-specific readability score""" |
|
|
try: |
|
|
|
|
|
optimal_words_per_sentence = 15 |
|
|
optimal_word_length = 5 |
|
|
optimal_complex_words_percentage = 15 |
|
|
|
|
|
|
|
|
sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - optimal_words_per_sentence) * 0.5) |
|
|
word_length_score = max(0, 10 - abs(metrics['avg_word_length'] - optimal_word_length) * 2) |
|
|
complexity_score = max(0, 10 - abs(metrics['complex_words_percentage'] - optimal_complex_words_percentage) * 0.3) |
|
|
|
|
|
|
|
|
overall_score = (sentence_score * 0.4 + word_length_score * 0.3 + complexity_score * 0.3) |
|
|
|
|
|
return round(overall_score, 1) |
|
|
|
|
|
except Exception: |
|
|
return 5.0 |
|
|
|
|
|
def _generate_readability_recommendations(self, metrics: Dict[str, float]) -> List[str]: |
|
|
"""Generate specific readability improvement recommendations""" |
|
|
recommendations = [] |
|
|
|
|
|
try: |
|
|
if metrics['avg_words_per_sentence'] > 20: |
|
|
recommendations.append("Break down long sentences for better AI processing") |
|
|
elif metrics['avg_words_per_sentence'] < 8: |
|
|
recommendations.append("Consider combining very short sentences for better context") |
|
|
|
|
|
if metrics['long_sentences_percentage'] > 30: |
|
|
recommendations.append("Reduce the number of complex sentences (>20 words)") |
|
|
|
|
|
if metrics['complex_words_percentage'] > 25: |
|
|
recommendations.append("Simplify vocabulary where possible for broader accessibility") |
|
|
elif metrics['complex_words_percentage'] < 5: |
|
|
recommendations.append("Add more specific terminology to establish authority") |
|
|
|
|
|
return recommendations |
|
|
|
|
|
except Exception: |
|
|
return ["Unable to generate specific recommendations"] |