MBilal-72 commited on
Commit
4a25546
·
verified ·
1 Parent(s): 82f1d4e

Update utils/optimizer.py

Browse files
Files changed (1) hide show
  1. utils/optimizer.py +547 -557
utils/optimizer.py CHANGED
@@ -1,558 +1,548 @@
1
- """
2
- Content Optimization Module
3
- Enhances content for better AI/LLM performance and GEO scores
4
- """
5
-
6
- import json
7
- import re
8
- from typing import Dict, Any, List, Optional
9
- from langchain.prompts import ChatPromptTemplate
10
-
11
-
12
- class ContentOptimizer:
13
- """Main class for optimizing content for AI search engines"""
14
-
15
- def __init__(self, llm):
16
- self.llm = llm
17
- self.setup_prompts()
18
-
19
- def setup_prompts(self):
20
- """Initialize optimization prompts"""
21
-
22
- # Main content enhancement prompt
23
- self.enhancement_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.
24
-
25
- Evaluate the input text based on the following criteria, assigning a score from 1–10 for each:
26
-
27
- Clarity: How easily can the content be understood?
28
- Structuredness: How well-organized and coherent is the content?
29
- LLM Answerability: How easily can an LLM extract precise answers from the content?
30
-
31
- Identify the most salient keywords.
32
-
33
- Rewrite the text to improve:
34
- - Clarity and precision
35
- - Logical structure and flow
36
- - Suitability for LLM-based information retrieval
37
-
38
- Present your analysis and optimized text in the following JSON format:
39
-
40
- ```json
41
- {
42
- "scores": {
43
- "clarity": 8.5,
44
- "structuredness": 7.0,
45
- "answerability": 9.0
46
- },
47
- "keywords": ["example", "installation", "setup"],
48
- "optimized_text": "..."
49
- }
50
- ```"""
51
-
52
- # SEO-style optimization prompt
53
- self.seo_style_prompt = """You are an AI-first SEO specialist. Optimize this content for AI search engines and LLM systems.
54
-
55
- Focus on:
56
- 1. Semantic keyword optimization
57
- 2. Question-answer format enhancement
58
- 3. Factual accuracy and authority signals
59
- 4. Conversational readiness
60
- 5. Citation-worthy structure
61
-
62
- Provide analysis and optimization in JSON:
63
-
64
- ```json
65
- {
66
- "seo_analysis": {
67
- "keyword_density": "analysis of current keywords",
68
- "semantic_gaps": ["missing semantic terms"],
69
- "readability_score": 8.5,
70
- "authority_signals": ["credentials", "citations"]
71
- },
72
- "optimized_content": {
73
- "title_suggestions": ["optimized title 1", "optimized title 2"],
74
- "meta_description": "AI-optimized meta description",
75
- "enhanced_content": "full optimized content...",
76
- "structured_data_suggestions": ["schema markup recommendations"]
77
- },
78
- "improvement_summary": {
79
- "changes_made": ["change 1", "change 2"],
80
- "expected_impact": "description of expected improvements"
81
- }
82
- }
83
- ```"""
84
-
85
- # Competitive content analysis prompt
86
- self.competitive_analysis_prompt = """Compare this content against best practices for AI search optimization. Identify gaps and opportunities.
87
-
88
- Original Content: {content}
89
-
90
- Analyze against these AI search factors:
91
- - Entity recognition and linking
92
- - Question coverage completeness
93
- - Factual statement clarity
94
- - Conversational flow
95
- - Semantic relationship mapping
96
-
97
- Provide competitive analysis in JSON format with specific recommendations."""
98
-
99
- def optimize_content(self, content: str, analyze_only: bool = False,
100
- include_keywords: bool = True, optimization_type: str = "standard") -> Dict[str, Any]:
101
- """
102
- Main content optimization function
103
-
104
- Args:
105
- content (str): Content to optimize
106
- analyze_only (bool): If True, only analyze without rewriting
107
- include_keywords (bool): Whether to include keyword analysis
108
- optimization_type (str): Type of optimization ("standard", "seo", "competitive")
109
-
110
- Returns:
111
- Dict: Optimization results with scores and enhanced content
112
- """
113
- try:
114
- # Choose optimization approach
115
- if optimization_type == "seo":
116
- return self._seo_style_optimization(content, analyze_only)
117
- elif optimization_type == "competitive":
118
- return self._competitive_optimization(content)
119
- else:
120
- return self._standard_optimization(content, analyze_only, include_keywords)
121
-
122
- except Exception as e:
123
- return {'error': f"Optimization failed: {str(e)}"}
124
-
125
- def _standard_optimization(self, content: str, analyze_only: bool, include_keywords: bool) -> Dict[str, Any]:
126
- """Standard content optimization using enhancement prompt"""
127
- try:
128
- # Modify prompt based on options
129
- prompt_text = self.enhancement_prompt
130
-
131
- if analyze_only:
132
- prompt_text = prompt_text.replace(
133
- "Rewrite the text to improve:",
134
- "Analyze the text for potential improvements in:"
135
- ).replace(
136
- '"optimized_text": "..."',
137
- '"optimization_suggestions": ["suggestion 1", "suggestion 2"]'
138
- )
139
-
140
- if not include_keywords:
141
- prompt_text = prompt_text.replace(
142
- '"keywords": ["example", "installation", "setup"],',
143
- ''
144
- )
145
-
146
- # Create and run chain
147
- prompt_template = ChatPromptTemplate.from_messages([
148
- ("system", prompt_text),
149
- ("user", content[:6000]) # Limit content length
150
- ])
151
-
152
- chain = prompt_template | self.llm
153
- result = chain.invoke({})
154
-
155
- # Parse result
156
- result_content = result.content if hasattr(result, 'content') else str(result)
157
- parsed_result = self._parse_optimization_result(result_content)
158
-
159
- # Add metadata
160
- parsed_result.update({
161
- 'optimization_type': 'standard',
162
- 'analyze_only': analyze_only,
163
- 'original_length': len(content),
164
- 'original_word_count': len(content.split())
165
- })
166
-
167
- return parsed_result
168
-
169
- except Exception as e:
170
- return {'error': f"Standard optimization failed: {str(e)}"}
171
-
172
- def _seo_style_optimization(self, content: str, analyze_only: bool) -> Dict[str, Any]:
173
- """SEO-focused optimization for AI search engines"""
174
- try:
175
- prompt_template = ChatPromptTemplate.from_messages([
176
- ("system", self.seo_style_prompt),
177
- ("user", f"Optimize this content for AI search engines:\n\n{content[:6000]}")
178
- ])
179
-
180
- chain = prompt_template | self.llm
181
- result = chain.invoke({})
182
-
183
- result_content = result.content if hasattr(result, 'content') else str(result)
184
- parsed_result = self._parse_optimization_result(result_content)
185
-
186
- # Add SEO-specific metadata
187
- parsed_result.update({
188
- 'optimization_type': 'seo',
189
- 'analyze_only': analyze_only,
190
- 'seo_focused': True
191
- })
192
-
193
- return parsed_result
194
-
195
- except Exception as e:
196
- return {'error': f"SEO optimization failed: {str(e)}"}
197
-
198
- def _competitive_optimization(self, content: str) -> Dict[str, Any]:
199
- """Competitive analysis-based optimization"""
200
- try:
201
- formatted_prompt = self.competitive_analysis_prompt.format(content=content[:5000])
202
-
203
- prompt_template = ChatPromptTemplate.from_messages([
204
- ("system", formatted_prompt),
205
- ("user", "Perform the competitive analysis and provide optimization recommendations.")
206
- ])
207
-
208
- chain = prompt_template | self.llm
209
- result = chain.invoke({})
210
-
211
- result_content = result.content if hasattr(result, 'content') else str(result)
212
- parsed_result = self._parse_optimization_result(result_content)
213
-
214
- parsed_result.update({
215
- 'optimization_type': 'competitive',
216
- 'competitive_analysis': True
217
- })
218
-
219
- return parsed_result
220
-
221
- except Exception as e:
222
- return {'error': f"Competitive optimization failed: {str(e)}"}
223
-
224
- def batch_optimize_content(self, content_list: List[str], optimization_type: str = "standard") -> List[Dict[str, Any]]:
225
- """
226
- Optimize multiple pieces of content in batch
227
-
228
- Args:
229
- content_list (List[str]): List of content pieces to optimize
230
- optimization_type (str): Type of optimization to apply
231
-
232
- Returns:
233
- List[Dict]: List of optimization results
234
- """
235
- results = []
236
-
237
- for i, content in enumerate(content_list):
238
- try:
239
- result = self.optimize_content(
240
- content,
241
- optimization_type=optimization_type
242
- )
243
- result['batch_index'] = i
244
- results.append(result)
245
-
246
- except Exception as e:
247
- results.append({
248
- 'batch_index': i,
249
- 'error': f"Batch optimization failed: {str(e)}"
250
- })
251
-
252
- return results
253
-
254
- def generate_content_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]:
255
- """
256
- Generate multiple optimized variations of the same content
257
-
258
- Args:
259
- content (str): Original content
260
- num_variations (int): Number of variations to generate
261
-
262
- Returns:
263
- List[Dict]: List of content variations with analysis
264
- """
265
- variations = []
266
-
267
- variation_prompts = [
268
- "Create a more conversational version optimized for AI chat responses",
269
- "Create a more authoritative version optimized for citations",
270
- "Create a more structured version optimized for question-answering"
271
- ]
272
-
273
- for i in range(min(num_variations, len(variation_prompts))):
274
- try:
275
- custom_prompt = f"""You are optimizing content for AI systems. {variation_prompts[i]}.
276
-
277
- Original content: {content[:4000]}
278
-
279
- Provide the optimized variation in JSON format:
280
- ```json
281
- {{
282
- "variation_type": "conversational/authoritative/structured",
283
- "optimized_content": "the rewritten content...",
284
- "key_changes": ["change 1", "change 2"],
285
- "target_use_case": "description of ideal use case"
286
- }}
287
- ```"""
288
-
289
- prompt_template = ChatPromptTemplate.from_messages([
290
- ("system", custom_prompt),
291
- ("user", "Generate the variation.")
292
- ])
293
-
294
- chain = prompt_template | self.llm
295
- result = chain.invoke({})
296
-
297
- result_content = result.content if hasattr(result, 'content') else str(result)
298
- parsed_result = self._parse_optimization_result(result_content)
299
-
300
- parsed_result.update({
301
- 'variation_index': i,
302
- 'variation_prompt': variation_prompts[i]
303
- })
304
-
305
- variations.append(parsed_result)
306
-
307
- except Exception as e:
308
- variations.append({
309
- 'variation_index': i,
310
- 'error': f"Variation generation failed: {str(e)}"
311
- })
312
-
313
- return variations
314
-
315
- def analyze_content_readability(self, content: str) -> Dict[str, Any]:
316
- """
317
- Analyze content readability for AI systems
318
-
319
- Args:
320
- content (str): Content to analyze
321
-
322
- Returns:
323
- Dict: Readability analysis results
324
- """
325
- try:
326
- # Basic readability metrics
327
- words = content.split()
328
- sentences = re.split(r'[.!?]+', content)
329
- sentences = [s.strip() for s in sentences if s.strip()]
330
-
331
- paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
332
-
333
- # Calculate metrics
334
- avg_words_per_sentence = len(words) / len(sentences) if sentences else 0
335
- avg_sentences_per_paragraph = len(sentences) / len(paragraphs) if paragraphs else 0
336
-
337
- # Character-based metrics
338
- avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
339
-
340
- # Complexity indicators
341
- long_sentences = [s for s in sentences if len(s.split()) > 20]
342
- complex_words = [w for w in words if len(w) > 6]
343
-
344
- return {
345
- 'basic_metrics': {
346
- 'total_words': len(words),
347
- 'total_sentences': len(sentences),
348
- 'total_paragraphs': len(paragraphs),
349
- 'avg_words_per_sentence': avg_words_per_sentence,
350
- 'avg_sentences_per_paragraph': avg_sentences_per_paragraph,
351
- 'avg_word_length': avg_word_length
352
- },
353
- 'complexity_indicators': {
354
- 'long_sentences_count': len(long_sentences),
355
- 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
356
- 'complex_words_count': len(complex_words),
357
- 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
358
- },
359
- 'ai_readability_score': self._calculate_ai_readability_score({
360
- 'avg_words_per_sentence': avg_words_per_sentence,
361
- 'avg_word_length': avg_word_length,
362
- 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
363
- }),
364
- 'recommendations': self._generate_readability_recommendations({
365
- 'avg_words_per_sentence': avg_words_per_sentence,
366
- 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
367
- 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
368
- })
369
- }
370
-
371
- except Exception as e:
372
- return {'error': f"Readability analysis failed: {str(e)}"}
373
-
374
- def extract_key_entities(self, content: str) -> Dict[str, Any]:
375
- """
376
- Extract key entities and topics for optimization
377
-
378
- Args:
379
- content (str): Content to analyze
380
-
381
- Returns:
382
- Dict: Extracted entities and topics
383
- """
384
- try:
385
- entity_prompt = """Extract key entities, topics, and concepts from this content for AI optimization.
386
-
387
- Content: {content}
388
-
389
- Identify:
390
- 1. Named entities (people, places, organizations)
391
- 2. Key concepts and topics
392
- 3. Technical terms and jargon
393
- 4. Potential semantic keywords
394
- 5. Question-answer opportunities
395
-
396
- Format as JSON:
397
- ```json
398
- {{
399
- "named_entities": ["entity1", "entity2"],
400
- "key_topics": ["topic1", "topic2"],
401
- "technical_terms": ["term1", "term2"],
402
- "semantic_keywords": ["keyword1", "keyword2"],
403
- "question_opportunities": ["What is...", "How does..."],
404
- "entity_relationships": ["relationship descriptions"]
405
- }}
406
- ```"""
407
-
408
- prompt_template = ChatPromptTemplate.from_messages([
409
- ("system", entity_prompt.format(content=content[:5000])),
410
- ("user", "Extract the entities and topics.")
411
- ])
412
-
413
- chain = prompt_template | self.llm
414
- result = chain.invoke({})
415
-
416
- result_content = result.content if hasattr(result, 'content') else str(result)
417
- return self._parse_optimization_result(result_content)
418
-
419
- except Exception as e:
420
- return {'error': f"Entity extraction failed: {str(e)}"}
421
-
422
- def optimize_for_voice_search(self, content: str) -> Dict[str, Any]:
423
- """
424
- Optimize content specifically for voice search and conversational AI
425
-
426
- Args:
427
- content (str): Content to optimize
428
-
429
- Returns:
430
- Dict: Voice search optimization results
431
- """
432
- try:
433
- voice_prompt = """Optimize this content for voice search and conversational AI systems.
434
-
435
- Focus on:
436
- 1. Natural language patterns
437
- 2. Question-based structure
438
- 3. Conversational tone
439
- 4. Clear, direct answers
440
- 5. Featured snippet optimization
441
-
442
- Original content: {content}
443
-
444
- Provide optimization in JSON:
445
- ```json
446
- {{
447
- "voice_optimized_content": "conversational version...",
448
- "question_answer_pairs": [
449
- {{"question": "What is...", "answer": "Direct answer..."}},
450
- {{"question": "How does...", "answer": "Step by step..."}}
451
- ],
452
- "featured_snippet_candidates": ["snippet 1", "snippet 2"],
453
- "natural_language_improvements": ["improvement 1", "improvement 2"],
454
- "conversational_score": 8.5
455
- }}
456
- ```"""
457
-
458
- prompt_template = ChatPromptTemplate.from_messages([
459
- ("system", voice_prompt.format(content=content[:4000])),
460
- ("user", "Optimize for voice search.")
461
- ])
462
-
463
- chain = prompt_template | self.llm
464
- result = chain.invoke({})
465
-
466
- result_content = result.content if hasattr(result, 'content') else str(result)
467
- parsed_result = self._parse_optimization_result(result_content)
468
-
469
- parsed_result.update({
470
- 'optimization_type': 'voice_search',
471
- 'voice_optimized': True
472
- })
473
-
474
- return parsed_result
475
-
476
- except Exception as e:
477
- return {'error': f"Voice search optimization failed: {str(e)}"}
478
-
479
- def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]:
480
- """Parse LLM response and extract structured results"""
481
- try:
482
- # Find JSON content in the response
483
- json_start = response_text.find('{')
484
- json_end = response_text.rfind('}') + 1
485
-
486
- if json_start != -1 and json_end != -1:
487
- json_str = response_text[json_start:json_end]
488
- parsed = json.loads(json_str)
489
-
490
- # Ensure consistent structure
491
- if 'scores' not in parsed and 'score' in parsed:
492
- parsed['scores'] = parsed['score']
493
-
494
- return parsed
495
- else:
496
- # If no JSON found, return raw response with error flag
497
- return {
498
- 'raw_response': response_text,
499
- 'parsing_error': 'No JSON structure found in response',
500
- 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
501
- }
502
-
503
- except json.JSONDecodeError as e:
504
- return {
505
- 'raw_response': response_text,
506
- 'parsing_error': f'JSON decode error: {str(e)}',
507
- 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
508
- }
509
- except Exception as e:
510
- return {
511
- 'raw_response': response_text,
512
- 'parsing_error': f'Unexpected parsing error: {str(e)}',
513
- 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
514
- }
515
-
516
- def _calculate_ai_readability_score(self, metrics: Dict[str, float]) -> float:
517
- """Calculate AI-specific readability score"""
518
- try:
519
- # Optimal ranges for AI consumption
520
- optimal_words_per_sentence = 15 # Sweet spot for AI processing
521
- optimal_word_length = 5 # Balance of complexity and clarity
522
- optimal_complex_words_percentage = 15 # Some complexity is good for authority
523
-
524
- # Calculate deviations from optimal
525
- sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - optimal_words_per_sentence) * 0.5)
526
- word_length_score = max(0, 10 - abs(metrics['avg_word_length'] - optimal_word_length) * 2)
527
- complexity_score = max(0, 10 - abs(metrics['complex_words_percentage'] - optimal_complex_words_percentage) * 0.3)
528
-
529
- # Weighted average
530
- overall_score = (sentence_score * 0.4 + word_length_score * 0.3 + complexity_score * 0.3)
531
-
532
- return round(overall_score, 1)
533
-
534
- except Exception:
535
- return 5.0 # Default neutral score
536
-
537
- def _generate_readability_recommendations(self, metrics: Dict[str, float]) -> List[str]:
538
- """Generate specific readability improvement recommendations"""
539
- recommendations = []
540
-
541
- try:
542
- if metrics['avg_words_per_sentence'] > 20:
543
- recommendations.append("Break down long sentences for better AI processing")
544
- elif metrics['avg_words_per_sentence'] < 8:
545
- recommendations.append("Consider combining very short sentences for better context")
546
-
547
- if metrics['long_sentences_percentage'] > 30:
548
- recommendations.append("Reduce the number of complex sentences (>20 words)")
549
-
550
- if metrics['complex_words_percentage'] > 25:
551
- recommendations.append("Simplify vocabulary where possible for broader accessibility")
552
- elif metrics['complex_words_percentage'] < 5:
553
- recommendations.append("Add more specific terminology to establish authority")
554
-
555
- return recommendations
556
-
557
- except Exception:
558
  return ["Unable to generate specific recommendations"]
 
1
+ """
2
+ Content Optimization Module
3
+ Enhances content for better AI/LLM performance and GEO scores
4
+ """
5
+
6
+ import json
7
+ import re
8
+ from typing import Dict, Any, List, Optional
9
+ from langchain.prompts import ChatPromptTemplate
10
+
11
+
12
+ class ContentOptimizer:
13
+ """Main class for optimizing content for AI search engines"""
14
+
15
+ def __init__(self, llm):
16
+ self.llm = llm
17
+ self.setup_prompts()
18
+
19
+ def setup_prompts(self):
20
+ """Initialize optimization prompts"""
21
+
22
+ # Main content enhancement prompt
23
+ self.enhancement_prompt = ("You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems."
24
+ "Evaluate the input text based on the following criteria, assigning a score from 1-10 for each:"
25
+ "Clarity: How easily can the content be understood?"
26
+ "Structuredness: How well-organized and coherent is the content?"
27
+ "LLM Answerability: How easily can an LLM extract precise answers from the content?"
28
+ "Identify the most salient keywords."
29
+ "Rewrite the text to improve:"
30
+ "- Clarity and precision"
31
+ "- Logical structure and flow"
32
+ "- Suitability for LLM-based information retrieval"
33
+ "Present your analysis and optimized text in the following JSON format:"
34
+ "```json"
35
+ "{"
36
+ "scores: {"
37
+ "clarity: 8.5,"
38
+ "structuredness: 7.0,"
39
+ "answerability: 9.0"
40
+ "},"
41
+ "keywords: [example, installation, setup],"
42
+ "optimized_text: ..."
43
+ "}"
44
+ "```"
45
+ )
46
+
47
+ # SEO-style optimization prompt
48
+ self.seo_style_prompt = ("You are an AI-first SEO specialist. Optimize this content for AI search engines and LLM systems."
49
+ "Focus on:"
50
+ "1. Semantic keyword optimization"
51
+ "2. Question-answer format enhancement"
52
+ "3. Factual accuracy and authority signals"
53
+ "4. Conversational readiness"
54
+ "5. Citation-worthy structure"
55
+ " Provide analysis and optimization in JSON:"
56
+ "```json"
57
+ "{"
58
+ "seo_analysis: {"
59
+ "keyword_density: analysis of current keywords,"
60
+ "semantic_gaps: [missing semantic terms],"
61
+ "readability_score: 8.5,"
62
+ "authority_signals : [credentials, citations]"
63
+ "},"
64
+ "optimized_content: {"
65
+ "title_suggestions: [optimized title 1, optimized title 2],"
66
+ "meta_description: AI-optimized meta description,"
67
+ "enhanced_content: full optimized content...,"
68
+ "structured_data_suggestions: [schema markup recommendations]"
69
+ "},"
70
+ "improvement_summary: {"
71
+ "changes_made: [change 1, change 2],"
72
+ "expected_impact: description of expected improvements"
73
+ "}"
74
+ "}"
75
+ "```"
76
+ )
77
+
78
+ # Competitive content analysis prompt
79
+ self.competitive_analysis_prompt =( "Compare this content against best practices for AI search optimization. Identify gaps and opportunities."
80
+ "Original Content: {content}"
81
+ "Analyze against these AI search factors:"
82
+ "- Entity recognition and linking"
83
+ "- Question coverage completeness"
84
+ "- Factual statement clarity"
85
+ "- Conversational flow"
86
+ "- Semantic relationship mapping"
87
+
88
+ "Provide competitive analysis in JSON format with specific recommendations."
89
+ )
90
+
91
+ def optimize_content(self, content: str, analyze_only: bool = False,
92
+ include_keywords: bool = True, optimization_type: str = "standard") -> Dict[str, Any]:
93
+ """
94
+ Main content optimization function
95
+ Args:
96
+ content (str): Content to optimize
97
+ analyze_only (bool): If True, only analyze without rewriting
98
+ include_keywords (bool): Whether to include keyword analysis
99
+ optimization_type (str): Type of optimization ("standard", "seo", "competitive")
100
+ Returns:
101
+ Dict: Optimization results with scores and enhanced content
102
+ """
103
+ try:
104
+ # Choose optimization approach
105
+ if optimization_type == "seo":
106
+ return self._seo_style_optimization(content, analyze_only)
107
+ elif optimization_type == "competitive":
108
+ return self._competitive_optimization(content)
109
+ else:
110
+ return self._standard_optimization(content, analyze_only, include_keywords)
111
+
112
+ except Exception as e:
113
+ return {'error': f"Optimization failed: {str(e)}"}
114
+
115
+ def _standard_optimization(self, content: str, analyze_only: bool, include_keywords: bool) -> Dict[str, Any]:
116
+ """Standard content optimization using enhancement prompt"""
117
+ try:
118
+ # Modify prompt based on options
119
+ prompt_text = self.enhancement_prompt
120
+
121
+ if analyze_only:
122
+ prompt_text = prompt_text.replace(
123
+ "Rewrite the text to improve:",
124
+ "Analyze the text for potential improvements in:"
125
+ ).replace(
126
+ '"optimized_text": "..."',
127
+ '"optimization_suggestions": ["suggestion 1", "suggestion 2"]'
128
+ )
129
+
130
+ if not include_keywords:
131
+ prompt_text = prompt_text.replace(
132
+ '"keywords": ["example", "installation", "setup"],',
133
+ ''
134
+ )
135
+
136
+ # Create and run chain
137
+ prompt_template = ChatPromptTemplate.from_messages([
138
+ ("system", prompt_text),
139
+ ("user", content[:6000]) # Limit content length
140
+ ])
141
+
142
+ chain = prompt_template | self.llm
143
+ result = chain.invoke({})
144
+
145
+ # Parse result
146
+ result_content = result.content if hasattr(result, 'content') else str(result)
147
+ parsed_result = self._parse_optimization_result(result_content)
148
+
149
+ # Add metadata
150
+ parsed_result.update({
151
+ 'optimization_type': 'standard',
152
+ 'analyze_only': analyze_only,
153
+ 'original_length': len(content),
154
+ 'original_word_count': len(content.split())
155
+ })
156
+
157
+ return parsed_result
158
+
159
+ except Exception as e:
160
+ return {'error': f"Standard optimization failed: {str(e)}"}
161
+
162
+ def _seo_style_optimization(self, content: str, analyze_only: bool) -> Dict[str, Any]:
163
+ """SEO-focused optimization for AI search engines"""
164
+ try:
165
+ prompt_template = ChatPromptTemplate.from_messages([
166
+ ("system", self.seo_style_prompt),
167
+ ("user", f"Optimize this content for AI search engines:\n\n{content[:6000]}")
168
+ ])
169
+
170
+ chain = prompt_template | self.llm
171
+ result = chain.invoke({})
172
+
173
+ result_content = result.content if hasattr(result, 'content') else str(result)
174
+ parsed_result = self._parse_optimization_result(result_content)
175
+
176
+ # Add SEO-specific metadata
177
+ parsed_result.update({
178
+ 'optimization_type': 'seo',
179
+ 'analyze_only': analyze_only,
180
+ 'seo_focused': True
181
+ })
182
+
183
+ return parsed_result
184
+
185
+ except Exception as e:
186
+ return {'error': f"SEO optimization failed: {str(e)}"}
187
+
188
+ def _competitive_optimization(self, content: str) -> Dict[str, Any]:
189
+ """Competitive analysis-based optimization"""
190
+ try:
191
+ formatted_prompt = self.competitive_analysis_prompt.format(content=content[:5000])
192
+
193
+ prompt_template = ChatPromptTemplate.from_messages([
194
+ ("system", formatted_prompt),
195
+ ("user", "Perform the competitive analysis and provide optimization recommendations.")
196
+ ])
197
+
198
+ chain = prompt_template | self.llm
199
+ result = chain.invoke({})
200
+
201
+ result_content = result.content if hasattr(result, 'content') else str(result)
202
+ parsed_result = self._parse_optimization_result(result_content)
203
+
204
+ parsed_result.update({
205
+ 'optimization_type': 'competitive',
206
+ 'competitive_analysis': True
207
+ })
208
+
209
+ return parsed_result
210
+
211
+ except Exception as e:
212
+ return {'error': f"Competitive optimization failed: {str(e)}"}
213
+
214
+ def batch_optimize_content(self, content_list: List[str], optimization_type: str = "standard") -> List[Dict[str, Any]]:
215
+ """
216
+ Optimize multiple pieces of content in batch
217
+
218
+ Args:
219
+ content_list (List[str]): List of content pieces to optimize
220
+ optimization_type (str): Type of optimization to apply
221
+
222
+ Returns:
223
+ List[Dict]: List of optimization results
224
+ """
225
+ results = []
226
+
227
+ for i, content in enumerate(content_list):
228
+ try:
229
+ result = self.optimize_content(
230
+ content,
231
+ optimization_type=optimization_type
232
+ )
233
+ result['batch_index'] = i
234
+ results.append(result)
235
+
236
+ except Exception as e:
237
+ results.append({
238
+ 'batch_index': i,
239
+ 'error': f"Batch optimization failed: {str(e)}"
240
+ })
241
+
242
+ return results
243
+
244
+ def generate_content_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]:
245
+ """
246
+ Generate multiple optimized variations of the same content
247
+
248
+ Args:
249
+ content (str): Original content
250
+ num_variations (int): Number of variations to generate
251
+
252
+ Returns:
253
+ List[Dict]: List of content variations with analysis
254
+ """
255
+ variations = []
256
+
257
+ variation_prompts = [
258
+ "Create a more conversational version optimized for AI chat responses",
259
+ "Create a more authoritative version optimized for citations",
260
+ "Create a more structured version optimized for question-answering"
261
+ ]
262
+
263
+ for i in range(min(num_variations, len(variation_prompts))):
264
+ try:
265
+ custom_prompt = f"""You are optimizing content for AI systems. {variation_prompts[i]}.
266
+
267
+ Original content: {content[:4000]}
268
+
269
+ Provide the optimized variation in JSON format:
270
+ ```json
271
+ {{
272
+ "variation_type": "conversational/authoritative/structured",
273
+ "optimized_content": "the rewritten content...",
274
+ "key_changes": ["change 1", "change 2"],
275
+ "target_use_case": "description of ideal use case"
276
+ }}
277
+ ```"""
278
+
279
+ prompt_template = ChatPromptTemplate.from_messages([
280
+ ("system", custom_prompt),
281
+ ("user", "Generate the variation.")
282
+ ])
283
+
284
+ chain = prompt_template | self.llm
285
+ result = chain.invoke({})
286
+
287
+ result_content = result.content if hasattr(result, 'content') else str(result)
288
+ parsed_result = self._parse_optimization_result(result_content)
289
+
290
+ parsed_result.update({
291
+ 'variation_index': i,
292
+ 'variation_prompt': variation_prompts[i]
293
+ })
294
+
295
+ variations.append(parsed_result)
296
+
297
+ except Exception as e:
298
+ variations.append({
299
+ 'variation_index': i,
300
+ 'error': f"Variation generation failed: {str(e)}"
301
+ })
302
+
303
+ return variations
304
+
305
+ def analyze_content_readability(self, content: str) -> Dict[str, Any]:
306
+ """
307
+ Analyze content readability for AI systems
308
+
309
+ Args:
310
+ content (str): Content to analyze
311
+
312
+ Returns:
313
+ Dict: Readability analysis results
314
+ """
315
+ try:
316
+ # Basic readability metrics
317
+ words = content.split()
318
+ sentences = re.split(r'[.!?]+', content)
319
+ sentences = [s.strip() for s in sentences if s.strip()]
320
+
321
+ paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
322
+
323
+ # Calculate metrics
324
+ avg_words_per_sentence = len(words) / len(sentences) if sentences else 0
325
+ avg_sentences_per_paragraph = len(sentences) / len(paragraphs) if paragraphs else 0
326
+
327
+ # Character-based metrics
328
+ avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
329
+
330
+ # Complexity indicators
331
+ long_sentences = [s for s in sentences if len(s.split()) > 20]
332
+ complex_words = [w for w in words if len(w) > 6]
333
+
334
+ return {
335
+ 'basic_metrics': {
336
+ 'total_words': len(words),
337
+ 'total_sentences': len(sentences),
338
+ 'total_paragraphs': len(paragraphs),
339
+ 'avg_words_per_sentence': avg_words_per_sentence,
340
+ 'avg_sentences_per_paragraph': avg_sentences_per_paragraph,
341
+ 'avg_word_length': avg_word_length
342
+ },
343
+ 'complexity_indicators': {
344
+ 'long_sentences_count': len(long_sentences),
345
+ 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
346
+ 'complex_words_count': len(complex_words),
347
+ 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
348
+ },
349
+ 'ai_readability_score': self._calculate_ai_readability_score({
350
+ 'avg_words_per_sentence': avg_words_per_sentence,
351
+ 'avg_word_length': avg_word_length,
352
+ 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
353
+ }),
354
+ 'recommendations': self._generate_readability_recommendations({
355
+ 'avg_words_per_sentence': avg_words_per_sentence,
356
+ 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
357
+ 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
358
+ })
359
+ }
360
+
361
+ except Exception as e:
362
+ return {'error': f"Readability analysis failed: {str(e)}"}
363
+
364
+ def extract_key_entities(self, content: str) -> Dict[str, Any]:
365
+ """
366
+ Extract key entities and topics for optimization
367
+
368
+ Args:
369
+ content (str): Content to analyze
370
+
371
+ Returns:
372
+ Dict: Extracted entities and topics
373
+ """
374
+ try:
375
+ entity_prompt = """Extract key entities, topics, and concepts from this content for AI optimization.
376
+
377
+ Content: {content}
378
+
379
+ Identify:
380
+ 1. Named entities (people, places, organizations)
381
+ 2. Key concepts and topics
382
+ 3. Technical terms and jargon
383
+ 4. Potential semantic keywords
384
+ 5. Question-answer opportunities
385
+
386
+ Format as JSON:
387
+ ```json
388
+ {{
389
+ "named_entities": ["entity1", "entity2"],
390
+ "key_topics": ["topic1", "topic2"],
391
+ "technical_terms": ["term1", "term2"],
392
+ "semantic_keywords": ["keyword1", "keyword2"],
393
+ "question_opportunities": ["What is...", "How does..."],
394
+ "entity_relationships": ["relationship descriptions"]
395
+ }}
396
+ ```"""
397
+
398
+ prompt_template = ChatPromptTemplate.from_messages([
399
+ ("system", entity_prompt.format(content=content[:5000])),
400
+ ("user", "Extract the entities and topics.")
401
+ ])
402
+
403
+ chain = prompt_template | self.llm
404
+ result = chain.invoke({})
405
+
406
+ result_content = result.content if hasattr(result, 'content') else str(result)
407
+ return self._parse_optimization_result(result_content)
408
+
409
+ except Exception as e:
410
+ return {'error': f"Entity extraction failed: {str(e)}"}
411
+
412
+ def optimize_for_voice_search(self, content: str) -> Dict[str, Any]:
413
+ """
414
+ Optimize content specifically for voice search and conversational AI
415
+
416
+ Args:
417
+ content (str): Content to optimize
418
+
419
+ Returns:
420
+ Dict: Voice search optimization results
421
+ """
422
+ try:
423
+ voice_prompt = """Optimize this content for voice search and conversational AI systems.
424
+
425
+ Focus on:
426
+ 1. Natural language patterns
427
+ 2. Question-based structure
428
+ 3. Conversational tone
429
+ 4. Clear, direct answers
430
+ 5. Featured snippet optimization
431
+
432
+ Original content: {content}
433
+
434
+ Provide optimization in JSON:
435
+ ```json
436
+ {{
437
+ "voice_optimized_content": "conversational version...",
438
+ "question_answer_pairs": [
439
+ {{"question": "What is...", "answer": "Direct answer..."}},
440
+ {{"question": "How does...", "answer": "Step by step..."}}
441
+ ],
442
+ "featured_snippet_candidates": ["snippet 1", "snippet 2"],
443
+ "natural_language_improvements": ["improvement 1", "improvement 2"],
444
+ "conversational_score": 8.5
445
+ }}
446
+ ```"""
447
+
448
+ prompt_template = ChatPromptTemplate.from_messages([
449
+ ("system", voice_prompt.format(content=content[:4000])),
450
+ ("user", "Optimize for voice search.")
451
+ ])
452
+
453
+ chain = prompt_template | self.llm
454
+ result = chain.invoke({})
455
+
456
+ result_content = result.content if hasattr(result, 'content') else str(result)
457
+ parsed_result = self._parse_optimization_result(result_content)
458
+
459
+ parsed_result.update({
460
+ 'optimization_type': 'voice_search',
461
+ 'voice_optimized': True
462
+ })
463
+
464
+ return parsed_result
465
+
466
+ except Exception as e:
467
+ return {'error': f"Voice search optimization failed: {str(e)}"}
468
+
469
+ def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]:
470
+ """Parse LLM response and extract structured results"""
471
+ try:
472
+ # Find JSON content in the response
473
+ json_start = response_text.find('{')
474
+ json_end = response_text.rfind('}') + 1
475
+
476
+ if json_start != -1 and json_end != -1:
477
+ json_str = response_text[json_start:json_end]
478
+ parsed = json.loads(json_str)
479
+
480
+ # Ensure consistent structure
481
+ if 'scores' not in parsed and 'score' in parsed:
482
+ parsed['scores'] = parsed['score']
483
+
484
+ return parsed
485
+ else:
486
+ # If no JSON found, return raw response with error flag
487
+ return {
488
+ 'raw_response': response_text,
489
+ 'parsing_error': 'No JSON structure found in response',
490
+ 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
491
+ }
492
+
493
+ except json.JSONDecodeError as e:
494
+ return {
495
+ 'raw_response': response_text,
496
+ 'parsing_error': f'JSON decode error: {str(e)}',
497
+ 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
498
+ }
499
+ except Exception as e:
500
+ return {
501
+ 'raw_response': response_text,
502
+ 'parsing_error': f'Unexpected parsing error: {str(e)}',
503
+ 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
504
+ }
505
+
506
+ def _calculate_ai_readability_score(self, metrics: Dict[str, float]) -> float:
507
+ """Calculate AI-specific readability score"""
508
+ try:
509
+ # Optimal ranges for AI consumption
510
+ optimal_words_per_sentence = 15 # Sweet spot for AI processing
511
+ optimal_word_length = 5 # Balance of complexity and clarity
512
+ optimal_complex_words_percentage = 15 # Some complexity is good for authority
513
+
514
+ # Calculate deviations from optimal
515
+ sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - optimal_words_per_sentence) * 0.5)
516
+ word_length_score = max(0, 10 - abs(metrics['avg_word_length'] - optimal_word_length) * 2)
517
+ complexity_score = max(0, 10 - abs(metrics['complex_words_percentage'] - optimal_complex_words_percentage) * 0.3)
518
+
519
+ # Weighted average
520
+ overall_score = (sentence_score * 0.4 + word_length_score * 0.3 + complexity_score * 0.3)
521
+
522
+ return round(overall_score, 1)
523
+
524
+ except Exception:
525
+ return 5.0 # Default neutral score
526
+
527
+ def _generate_readability_recommendations(self, metrics: Dict[str, float]) -> List[str]:
528
+ """Generate specific readability improvement recommendations"""
529
+ recommendations = []
530
+
531
+ try:
532
+ if metrics['avg_words_per_sentence'] > 20:
533
+ recommendations.append("Break down long sentences for better AI processing")
534
+ elif metrics['avg_words_per_sentence'] < 8:
535
+ recommendations.append("Consider combining very short sentences for better context")
536
+
537
+ if metrics['long_sentences_percentage'] > 30:
538
+ recommendations.append("Reduce the number of complex sentences (>20 words)")
539
+
540
+ if metrics['complex_words_percentage'] > 25:
541
+ recommendations.append("Simplify vocabulary where possible for broader accessibility")
542
+ elif metrics['complex_words_percentage'] < 5:
543
+ recommendations.append("Add more specific terminology to establish authority")
544
+
545
+ return recommendations
546
+
547
+ except Exception:
 
 
 
 
 
 
 
 
 
 
548
  return ["Unable to generate specific recommendations"]