Files changed (4) hide show
  1. app.py +115 -588
  2. requirements.txt +5 -1
  3. utils/lang_utils.py +14 -0
  4. utils/optimizer.py +500 -292
app.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Main Streamlit Application - GEO SEO AI Optimizer with RAG-Enhanced Content Optimization
3
  Entry point for the application with UI components
4
  """
5
 
@@ -8,20 +8,17 @@ import os
8
  import tempfile
9
  import json
10
  from typing import Dict, Any, List
11
- import time
12
 
13
  # Import our custom modules
14
  from utils.parser import PDFParser, TextParser, WebpageParser
15
  from utils.scorer import GEOScorer
16
- from utils.optimizer import ContentOptimizer # This will be your enhanced version
17
  from utils.chunker import VectorChunker
18
  from utils.export import ResultExporter
19
 
20
  # Import LangChain components
21
  from langchain_groq import ChatGroq
22
  from langchain_community.embeddings import HuggingFaceEmbeddings
23
- from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
24
- from langchain_core.messages import AIMessage, HumanMessage
25
 
26
  class GEOSEOApp:
27
  """Main application class that orchestrates all components"""
@@ -44,16 +41,13 @@ class GEOSEOApp:
44
  """Initialize LLM and embedding models"""
45
  self.llm = ChatGroq(
46
  api_key=self.groq_api_key,
47
- model_name="llama-3.1-8b-instant",
48
  temperature=0.1
49
  )
50
 
51
  self.embeddings = HuggingFaceEmbeddings(
52
- model_name="sentence-transformers/all-MiniLM-L6-v2",
53
- model_kwargs={"device": "cpu"}
54
- # model_name="sentence-transformers/all-MiniLM-L6-v2",
55
- # model_kwargs={"device": "cpu"},
56
- # cache_folder="./hf_caches",
57
  )
58
 
59
  def setup_parsers(self):
@@ -63,13 +57,10 @@ class GEOSEOApp:
63
  self.webpage_parser = WebpageParser()
64
 
65
  def setup_components(self):
66
- """Initialize processing components with RAG integration"""
67
  self.geo_scorer = GEOScorer(self.llm)
 
68
  self.vector_chunker = VectorChunker(self.embeddings)
69
-
70
- # Enhanced content optimizer with RAG capabilities
71
- self.content_optimizer = ContentOptimizer(self.llm, self.vector_chunker)
72
-
73
  self.result_exporter = ResultExporter()
74
 
75
  def run(self):
@@ -81,39 +72,39 @@ class GEOSEOApp:
81
  )
82
 
83
  st.title("🚀 GEO SEO AI Optimizer")
84
- st.markdown("*Optimize your content for AI search engines and LLM systems with RAG-enhanced analysis*")
85
 
86
  # Sidebar
87
  self.render_sidebar()
88
 
89
  # Main tabs
90
- tab1, tab2, tab3, tab4 = st.tabs([
91
  "🌐 Website GEO Analysis",
92
- "🔧 GEO Content Enhancement",
93
- "📄 Document Q&A",
94
- "🧠 Generate GEO Content",
95
  ])
96
 
97
  with tab1:
98
  self.render_website_analysis_tab()
99
 
100
  with tab2:
101
- self.render_geo_content_enhancement_tab()
102
 
103
  with tab3:
104
  self.render_document_qa_tab()
105
- with tab4:
106
- self.render_generate_geo_content_tab()
107
-
108
 
109
  def render_sidebar(self):
110
  """Render sidebar with information and controls"""
111
  st.sidebar.title("🛠️ GEO Tools")
 
 
112
  st.sidebar.markdown("- 🌐 Website GEO Analysis")
113
- st.sidebar.markdown("- 🔧 RAG-Enhanced Content Optimization")
114
  st.sidebar.markdown("- 📊 AI-First SEO Scoring")
115
- st.sidebar.markdown("- 📄 Document Q&A with RAG")
116
- st.sidebar.markdown("- 🧠 Generate GEO Content")
 
 
 
117
 
118
  st.sidebar.markdown("---")
119
  st.sidebar.markdown("### 📖 GEO Metrics")
@@ -121,528 +112,14 @@ class GEOSEOApp:
121
  st.sidebar.markdown("**Query Intent Matching**: How well content matches user queries")
122
  st.sidebar.markdown("**Conversational Readiness**: Suitability for AI chat responses")
123
  st.sidebar.markdown("**Citation Worthiness**: Probability of being cited by AI")
124
- st.sidebar.markdown("**Context Completeness**: How self-contained the content is")
125
- st.sidebar.markdown("**Semantic Richness**: Depth of topic coverage")
126
 
127
  st.sidebar.markdown("---")
128
- st.sidebar.markdown("### 🧠 RAG Enhancement")
129
- st.sidebar.markdown("- **Knowledge Base**: GEO best practices")
130
- st.sidebar.markdown("- **Contextual Analysis**: AI-informed optimization")
131
- st.sidebar.markdown("- **Entity Extraction**: AI-powered entity recognition")
132
- st.sidebar.markdown("- **Competitive Analysis**: Gap identification")
133
-
134
- def render_geo_content_enhancement_tab(self):
135
- """Render GEO Content Enhancement tab with RAG integration"""
136
- st.header("🔧 GEO Content Enhancement with RAG")
137
- st.markdown("Analyze and optimize your content using AI-powered Generative Engine Optimization with RAG-enhanced knowledge base.")
138
-
139
- # Content input
140
- input_text = st.text_area(
141
- "Enter content to analyze and enhance:",
142
- height=200,
143
- key="geo_enhancement_input",
144
- help="Paste your content here for GEO optimization using RAG-enhanced analysis"
145
- )
146
-
147
- # GEO Optimization type selector
148
- st.markdown("### ⚙️ GEO Optimization Settings")
149
- col1, col2 = st.columns(2)
150
-
151
- with col1:
152
- optimization_type = st.selectbox(
153
- "Select GEO Optimization Type:",
154
- options=[
155
- "geo_standard",
156
- # "competitive_geo",
157
- # "geo_readability",
158
- # "geo_entity_extraction",
159
- # "geo_variations",
160
- # "geo_batch_optimize"
161
- ],
162
- format_func=lambda x: {
163
- "geo_standard": "🔧 Standard GEO Enhancement",
164
- # "competitive_geo": "📊 Competitive GEO Analysis",
165
- # "geo_readability": "📖 GEO Readability Analysis",
166
- # "geo_entity_extraction": "🏷️ GEO Entity Extraction",
167
- # "geo_variations": "🔄 GEO Content Variations",
168
- # "geo_batch_optimize": "📦 Batch GEO Optimization"
169
- }[x],
170
- index=0,
171
- help="Choose the type of GEO optimization powered by RAG analysis"
172
- )
173
-
174
- with col2:
175
- # Additional options based on optimization type
176
- if optimization_type in ["geo_standard", "competitive_geo"]:
177
- analyze_only = st.checkbox("Analysis", value=True)
178
- include_rag_context = st.checkbox("Include RAG context details", value=True)
179
- # elif optimization_type == "geo_variations":
180
- # num_variations = st.slider("Number of variations", min_value=1, max_value=3, value=2)
181
- # analyze_only = False
182
- # include_rag_context = True
183
- # elif optimization_type == "geo_batch_optimize":
184
- # st.info("For batch optimization, separate multiple content pieces with '---' divider")
185
- # analyze_only = False
186
- # include_rag_context = True
187
- else:
188
- analyze_only = False
189
- include_rag_context = True
190
-
191
- # Show description based on optimization type
192
- optimization_descriptions = {
193
- "geo_standard": "🔧 RAG-enhanced GEO optimization focusing on AI search visibility, conversational readiness, and citation worthiness using knowledge base guidance.",
194
- # "competitive_geo": "📊 Competitive GEO analysis against best practices with gap identification and actionable recommendations using RAG context.",
195
- # "geo_readability": "📖 Detailed readability analysis specifically optimized for AI systems and LLM consumption patterns.",
196
- # "geo_entity_extraction": "🏷️ AI-powered extraction of key entities, topics, and concepts relevant for GEO optimization.",
197
- # "geo_variations": "🔄 Generate multiple GEO-optimized variations (FAQ, conversational, authoritative) using RAG knowledge.",
198
- # "geo_batch_optimize": "📦 Process multiple content pieces simultaneously with consistent GEO optimization."
199
- }
200
-
201
- st.info(f"**{optimization_descriptions[optimization_type]}**")
202
-
203
- # Knowledge base status
204
- if hasattr(self.content_optimizer, 'geo_knowledge'):
205
- st.success(f"✅ RAG Knowledge Base Loaded: {len(self.content_optimizer.geo_knowledge)} GEO best practice documents")
206
- else:
207
- st.warning("⚠️ RAG Knowledge Base not available - falling back to standard optimization")
208
-
209
- # Submit button
210
- if st.button("🚀 Process Content with GEO+RAG", key="geo_enhancement_submit"):
211
- if not input_text.strip():
212
- st.warning("Please enter some content to analyze.")
213
- return
214
-
215
- try:
216
- with st.spinner(f"Processing content with {optimization_type} using RAG-enhanced GEO analysis..."):
217
- # Handle different GEO optimization types
218
- if optimization_type == "geo_standard":
219
- result = self.content_optimizer.optimize_content_with_rag(
220
- input_text,
221
- optimization_type="geo_standard",
222
- analyze_only=analyze_only
223
- )
224
-
225
- elif optimization_type == "competitive_geo":
226
- result = self.content_optimizer.optimize_content_with_rag(
227
- input_text,
228
- optimization_type="competitive_geo",
229
- analyze_only=analyze_only
230
- )
231
-
232
- elif optimization_type == "geo_readability":
233
- result = self.content_optimizer.analyze_geo_readability(input_text)
234
-
235
- elif optimization_type == "geo_entity_extraction":
236
- result = self.content_optimizer.extract_geo_entities(input_text)
237
-
238
- elif optimization_type == "geo_variations":
239
- result = self.content_optimizer.generate_geo_variations(
240
- input_text,
241
- num_variations=num_variations
242
- )
243
-
244
- elif optimization_type == "geo_batch_optimize":
245
- # Split content by '---' separator
246
- content_pieces = [piece.strip() for piece in input_text.split('---') if piece.strip()]
247
- if len(content_pieces) > 1:
248
- result = self.content_optimizer.batch_optimize_with_rag(content_pieces)
249
- else:
250
- st.warning("For batch optimization, please separate content pieces with '---'")
251
- return
252
-
253
- if isinstance(result, list):
254
- # Handle list results (variations, batch)
255
- if any(r.get("error") for r in result):
256
- failed_results = [r for r in result if r.get("error")]
257
- st.error(f"Some processing failed: {len(failed_results)} out of {len(result)} items")
258
- else:
259
- st.success("All content processed successfully!")
260
- elif result.get("error"):
261
- st.error(f"Processing failed: {result['error']}")
262
- return
263
- else:
264
- st.success(f"{optimization_type.replace('_', ' ').title()} completed successfully!")
265
-
266
- # Display results based on optimization type
267
- self.display_geo_enhancement_results(result, optimization_type, input_text, include_rag_context)
268
-
269
- except Exception as e:
270
- st.error(f"An error occurred: {str(e)}")
271
-
272
- def display_geo_enhancement_results(self, result, optimization_type, original_text, include_rag_context=True):
273
- """Display results based on GEO optimization type"""
274
-
275
- if optimization_type == "geo_batch_optimize":
276
- self.display_geo_batch_results(result)
277
- elif optimization_type == "geo_variations":
278
- self.display_geo_variation_results(result)
279
- elif optimization_type == "geo_readability":
280
- self.display_geo_readability_results(result)
281
- elif optimization_type == "geo_entity_extraction":
282
- self.display_geo_entity_results(result)
283
- else:
284
- self.display_standard_geo_results(result, optimization_type, include_rag_context)
285
-
286
- # Export functionality
287
- self.display_geo_export_options(result, optimization_type, original_text)
288
-
289
- def display_standard_geo_results(self, result, optimization_type, include_rag_context):
290
- """Display results for standard and competitive GEO optimizations"""
291
- st.markdown("### 📊 GEO Analysis Results")
292
-
293
- # Show GEO scores if available
294
- geo_analysis = result.get("geo_analysis", {})
295
- if geo_analysis:
296
- st.markdown("#### 🎯 GEO Performance Metrics")
297
-
298
- col1, col2, col3 = st.columns(3)
299
- with col1:
300
- current_score = geo_analysis.get("current_geo_score", 0)
301
- st.metric("Overall GEO Score", f"{current_score}/10")
302
-
303
- with col2:
304
- ai_visibility = geo_analysis.get("ai_search_visibility", 0)
305
- st.metric("AI Search Visibility", f"{ai_visibility}/10")
306
-
307
- with col3:
308
- citation_worthy = geo_analysis.get("citation_worthiness", 0)
309
- st.metric("Citation Worthiness", f"{citation_worthy}/10")
310
-
311
- # Second row of metrics
312
- col1, col2, col3 = st.columns(3)
313
- with col1:
314
- query_matching = geo_analysis.get("query_intent_matching", 0)
315
- st.metric("Query Intent Match", f"{query_matching}/10")
316
-
317
- with col2:
318
- conversational = geo_analysis.get("conversational_readiness", 0)
319
- st.metric("Conversational Ready", f"{conversational}/10")
320
-
321
- with col3:
322
- context_complete = geo_analysis.get("context_completeness", 0)
323
- st.metric("Context Complete", f"{context_complete}/10")
324
-
325
- # Show optimization opportunities
326
- opportunities = result.get("optimization_opportunities", [])
327
- if opportunities:
328
- st.markdown("#### 🚀 Optimization Opportunities")
329
-
330
- high_priority = [opp for opp in opportunities if opp.get('priority') == 'high']
331
- medium_priority = [opp for opp in opportunities if opp.get('priority') == 'medium']
332
-
333
- if high_priority:
334
- st.markdown("##### 🔴 High Priority")
335
- for opp in high_priority:
336
- st.write(f"**{opp.get('type', 'Optimization')}**: {opp.get('description', '')}")
337
- if opp.get('expected_impact'):
338
- st.write(f"*Expected Impact: {opp.get('expected_impact')}*")
339
- st.write("---")
340
-
341
- if medium_priority:
342
- st.markdown("##### 🟡 Medium Priority")
343
- for opp in medium_priority:
344
- st.write(f"**{opp.get('type', 'Optimization')}**: {opp.get('description', '')}")
345
- if opp.get('expected_impact'):
346
- st.write(f"*Expected Impact: {opp.get('expected_impact')}*")
347
- st.write("---")
348
-
349
- # Show GEO keywords and entities
350
- geo_keywords = result.get("geo_keywords", {})
351
- if geo_keywords:
352
- st.markdown("#### 🔑 GEO Keywords & Entities")
353
-
354
- col1, col2 = st.columns(2)
355
- with col1:
356
- primary_entities = geo_keywords.get("primary_entities", [])
357
- if primary_entities:
358
- st.write("**Primary Entities:**")
359
- st.write(", ".join(primary_entities))
360
-
361
- semantic_terms = geo_keywords.get("semantic_terms", [])
362
- if semantic_terms:
363
- st.write("**Semantic Terms:**")
364
- st.write(", ".join(semantic_terms))
365
-
366
- with col2:
367
- question_patterns = geo_keywords.get("question_patterns", [])
368
- if question_patterns:
369
- st.write("**Question Patterns:**")
370
- for q in question_patterns:
371
- st.write(f"• {q}")
372
-
373
- related_concepts = geo_keywords.get("related_concepts", [])
374
- if related_concepts:
375
- st.write("**Related Concepts:**")
376
- st.write(", ".join(related_concepts))
377
-
378
- # Show optimized content
379
- optimized_content = result.get("optimized_content", {})
380
- if optimized_content:
381
- enhanced_text = optimized_content.get("enhanced_text", "")
382
- if enhanced_text:
383
- st.markdown("#### ✨ GEO-Optimized Content")
384
- st.text_area(
385
- "Enhanced version:",
386
- value=enhanced_text,
387
- height=250,
388
- key="geo_optimized_output"
389
- )
390
-
391
- # Show structural improvements
392
- structural_improvements = optimized_content.get("structural_improvements", [])
393
- if structural_improvements:
394
- st.markdown("**Structural Improvements:**")
395
- for improvement in structural_improvements:
396
- st.write(f"• {improvement}")
397
-
398
- # Show semantic enhancements
399
- semantic_enhancements = optimized_content.get("semantic_enhancements", [])
400
- if semantic_enhancements:
401
- st.markdown("**Semantic Enhancements:**")
402
- for enhancement in semantic_enhancements:
403
- st.write(f"• {enhancement}")
404
-
405
- # Show competitive analysis if available
406
- if "competitive_gaps" in result:
407
- st.markdown("#### 📊 Competitive GEO Analysis")
408
- competitive_gaps = result["competitive_gaps"]
409
-
410
- col1, col2 = st.columns(2)
411
- with col1:
412
- missing_questions = competitive_gaps.get("missing_question_patterns", [])
413
- if missing_questions:
414
- st.write("**Missing Question Patterns:**")
415
- for q in missing_questions:
416
- st.write(f"• {q}")
417
-
418
- entity_gaps = competitive_gaps.get("entity_gaps", [])
419
- if entity_gaps:
420
- st.write("**Entity Gaps:**")
421
- st.write(", ".join(entity_gaps))
422
-
423
- with col2:
424
- semantic_opportunities = competitive_gaps.get("semantic_opportunities", [])
425
- if semantic_opportunities:
426
- st.write("**Semantic Opportunities:**")
427
- st.write(", ".join(semantic_opportunities))
428
-
429
- structural_weaknesses = competitive_gaps.get("structural_weaknesses", [])
430
- if structural_weaknesses:
431
- st.write("**Structural Weaknesses:**")
432
- for weakness in structural_weaknesses:
433
- st.write(f"• {weakness}")
434
-
435
- # Show recommendations
436
- recommendations = result.get("recommendations", [])
437
- if recommendations:
438
- st.markdown("#### 💡 GEO Recommendations")
439
- for i, rec in enumerate(recommendations, 1):
440
- st.write(f"**{i}.** {rec}")
441
-
442
- # RAG context information
443
- if include_rag_context and result.get("rag_enhanced"):
444
- with st.expander("🧠 RAG Enhancement Details"):
445
- st.write("**RAG Status:** ✅ Knowledge base successfully applied")
446
- st.write(f"**Knowledge Sources:** {result.get('knowledge_sources', 'Multiple')} GEO best practice documents")
447
- st.write(f"**Enhancement Type:** {result.get('optimization_type', 'Standard')}")
448
-
449
- if result.get('parsing_error'):
450
- st.warning(f"**Parsing Note:** {result['parsing_error']}")
451
-
452
- def display_geo_batch_results(self, results):
453
- """Display batch GEO optimization results"""
454
- st.markdown("### 📦 Batch GEO Processing Results")
455
-
456
- successful_results = [r for r in results if not r.get('error')]
457
- failed_results = [r for r in results if r.get('error')]
458
-
459
- col1, col2, col3 = st.columns(3)
460
- with col1:
461
- st.metric("Total Pieces", len(results))
462
- with col2:
463
- st.metric("Successful", len(successful_results))
464
- with col3:
465
- st.metric("Failed", len(failed_results))
466
-
467
- # Show individual results
468
- for result in results:
469
- idx = result.get('batch_index', 0)
470
- st.markdown(f"#### Content Piece {idx + 1}")
471
-
472
- if result.get('error'):
473
- st.error(f"Processing failed: {result['error']}")
474
- else:
475
- # Show GEO scores
476
- geo_analysis = result.get("geo_analysis", {})
477
- if geo_analysis:
478
- col1, col2, col3 = st.columns(3)
479
- with col1:
480
- st.metric("GEO Score", f"{geo_analysis.get('current_geo_score', 0):.1f}")
481
- with col2:
482
- st.metric("AI Visibility", f"{geo_analysis.get('ai_search_visibility', 0):.1f}")
483
- with col3:
484
- st.metric("Citation Worthy", f"{geo_analysis.get('citation_worthiness', 0):.1f}")
485
-
486
- # Show optimized content if available
487
- optimized_content = result.get("optimized_content", {})
488
- enhanced_text = optimized_content.get("enhanced_text", "")
489
- if enhanced_text:
490
- with st.expander("View GEO-optimized content"):
491
- st.text_area("", value=enhanced_text[:500] + "...", height=150, key=f"batch_geo_output_{idx}")
492
-
493
- st.write("---")
494
-
495
- def display_geo_variation_results(self, variations):
496
- """Display GEO content variation results"""
497
- st.markdown("### 🔄 GEO Content Variations")
498
-
499
- for i, variation in enumerate(variations):
500
- if variation.get('error'):
501
- st.error(f"Variation {i+1} failed: {variation['error']}")
502
- continue
503
-
504
- variation_type = variation.get('variation_type', f'Variation {i+1}')
505
- st.markdown(f"#### {variation_type.replace('_', ' ').title()} Version")
506
-
507
- # Show GEO improvements
508
- geo_improvements = variation.get('geo_improvements', [])
509
- if geo_improvements:
510
- st.write("**GEO Improvements:**")
511
- for improvement in geo_improvements:
512
- st.write(f"• {improvement}")
513
-
514
- # Show target AI systems
515
- target_ai_systems = variation.get('target_ai_systems', [])
516
- if target_ai_systems:
517
- st.write(f"**Optimized For:** {', '.join(target_ai_systems)}")
518
-
519
- # Show expected benefits
520
- expected_benefits = variation.get('expected_geo_benefits', [])
521
- if expected_benefits:
522
- st.write("**Expected GEO Benefits:**")
523
- for benefit in expected_benefits:
524
- st.write(f"• {benefit}")
525
-
526
- # Show optimized content
527
- optimized_content = variation.get('optimized_content', '')
528
- if optimized_content:
529
- st.text_area(
530
- f"{variation_type} content:",
531
- value=optimized_content,
532
- height=200,
533
- key=f"geo_variation_{i}"
534
- )
535
-
536
- st.write("---")
537
-
538
- def display_geo_readability_results(self, result):
539
- """Display GEO readability analysis results"""
540
- st.markdown("### 📖 GEO Readability Analysis")
541
-
542
- # Basic GEO metrics
543
- geo_metrics = result.get('geo_readability_metrics', {})
544
- if geo_metrics:
545
- st.markdown("#### 📊 GEO Content Metrics")
546
- col1, col2, col3, col4 = st.columns(4)
547
-
548
- with col1:
549
- st.metric("Total Words", geo_metrics.get('total_words', 0))
550
- with col2:
551
- st.metric("Questions", geo_metrics.get('questions_count', 0))
552
- with col3:
553
- st.metric("Headings", geo_metrics.get('headings_count', 0))
554
- with col4:
555
- st.metric("Lists", geo_metrics.get('lists_count', 0))
556
-
557
- # Second row
558
- col1, col2, col3, col4 = st.columns(4)
559
- with col1:
560
- st.metric("Entity Mentions", geo_metrics.get('entity_mentions', 0))
561
- with col2:
562
- st.metric("Data Points", geo_metrics.get('numeric_data_points', 0))
563
- with col3:
564
- st.metric("Paragraphs", geo_metrics.get('total_paragraphs', 0))
565
- with col4:
566
- geo_score = result.get('geo_readability_score', 0)
567
- st.metric("GEO Readability", f"{geo_score}/10")
568
-
569
- # AI optimization indicators
570
- ai_indicators = result.get('ai_optimization_indicators', {})
571
- if ai_indicators:
572
- st.markdown("#### 🤖 AI Optimization Indicators")
573
- col1, col2 = st.columns(2)
574
-
575
- with col1:
576
- question_ratio = ai_indicators.get('question_ratio', 0)
577
- st.metric("Question Ratio", f"{question_ratio:.2%}")
578
- structure_score = ai_indicators.get('structure_score', 0)
579
- st.metric("Structure Score", f"{structure_score:.1f}/10")
580
-
581
- with col2:
582
- entity_density = ai_indicators.get('entity_density', 0)
583
- st.metric("Entity Density", f"{entity_density:.2%}")
584
- data_richness = ai_indicators.get('data_richness', 0)
585
- st.metric("Data Richness", f"{data_richness:.2%}")
586
-
587
- # GEO recommendations
588
- geo_recommendations = result.get('geo_recommendations', [])
589
- if geo_recommendations:
590
- st.markdown("#### 💡 GEO Optimization Recommendations")
591
- for i, rec in enumerate(geo_recommendations, 1):
592
- st.write(f"**{i}.** {rec}")
593
-
594
- def display_geo_entity_results(self, result):
595
- """Display GEO entity extraction results"""
596
- st.markdown("### 🏷️ GEO Entity Analysis")
597
-
598
- if result.get('error'):
599
- st.error(f"Entity extraction failed: {result['error']}")
600
- return
601
-
602
- geo_entities = result.get('geo_entities', {})
603
- if geo_entities:
604
- # Display extracted entities
605
- for entity_type, entity_data in geo_entities.items():
606
- if entity_data:
607
- st.markdown(f"#### {entity_type.replace('_', ' ').title()}")
608
- st.write(entity_data)
609
- st.write("---")
610
-
611
- # Extraction metadata
612
- extraction_success = result.get('extraction_success', False)
613
- if extraction_success:
614
- st.success("✅ Entity extraction completed successfully")
615
- st.write(f"**Content Length:** {result.get('content_length', 0)} characters")
616
- st.write(f"**Extraction Method:** {result.get('extraction_method', 'Unknown')}")
617
-
618
- def display_geo_export_options(self, result, optimization_type, original_text):
619
- """Display export options for GEO results"""
620
- st.markdown("### 📥 Export GEO Results")
621
-
622
- # Prepare export data
623
- export_data = {
624
- 'timestamp': time.time(),
625
- 'optimization_type': optimization_type,
626
- 'original_text': original_text,
627
- 'original_word_count': len(original_text.split()),
628
- 'geo_results': result,
629
- 'rag_enhanced': result.get('rag_enhanced', False) if not isinstance(result, list) else any(r.get('rag_enhanced', False) for r in result),
630
- 'knowledge_sources': result.get('knowledge_sources', 0) if not isinstance(result, list) else 'multiple'
631
- }
632
-
633
- # Serialize data to JSON
634
- export_json = json.dumps(export_data, indent=2, default=str)
635
-
636
- # Add download button
637
- st.download_button(
638
- label="📥 Download GEO Analysis Report",
639
- data=export_json,
640
- file_name=f"geo_{optimization_type}_analysis_{int(time.time())}.json",
641
- mime="application/json"
642
- )
643
-
644
- # Keep existing methods for other tabs (render_document_qa_tab, render_website_analysis_tab, etc.)
645
- # ... (rest of the methods remain the same as in your original code)
646
 
647
  def render_document_qa_tab(self):
648
  """Render Document Q&A tab"""
@@ -705,6 +182,96 @@ class GEOSEOApp:
705
  except Exception as e:
706
  st.error(f"An error occurred: {str(e)}")
707
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
708
  def render_website_analysis_tab(self):
709
  """Render Website GEO Analysis tab"""
710
  st.header("🌐 Website GEO Analysis")
@@ -932,46 +499,6 @@ class GEOSEOApp:
932
  tmp_file.write(uploaded_file.read())
933
  return tmp_file.name
934
 
935
- def render_generate_geo_content_tab(self):
936
- """Tab to generate fresh GEO-optimized content using system prompts"""
937
- st.header("🧠 Generate GEO Content")
938
- st.markdown("Use this tool to generate AI-optimized content from scratch based on your topic or query.")
939
-
940
- # User input
941
- user_prompt = st.text_area("Describe the content you want (e.g., topic, style, target audience):", height=150)
942
-
943
- # Continue chat option
944
- if "chat_history" not in st.session_state:
945
- st.session_state.chat_history = []
946
-
947
- if st.button("🧠 Generate Content"):
948
- if not user_prompt.strip():
949
- st.warning("Please enter a topic or description.")
950
- return
951
-
952
- # Add user message to chat history
953
- st.session_state.chat_history.append(HumanMessage(content=user_prompt))
954
-
955
- # Define system prompt for GEO content generation
956
- system_prompt = (
957
- "You are a Generative Engine Optimization (GEO) content creation specialist. "
958
- "Create content that is highly optimized for AI systems, LLMs, and generative search engines. "
959
- "Ensure the content includes rich semantics, clear structure, relevant keywords, and is suitable for conversational use, citations, and AI summaries."
960
- )
961
- st.session_state.chat_history.insert(0, SystemMessagePromptTemplate.from_template(system_prompt).format())
962
-
963
- with st.spinner("Generating GEO-optimized content..."):
964
- response = self.llm.invoke(st.session_state.chat_history)
965
- st.session_state.chat_history.append(AIMessage(content=response.content))
966
- st.success("✅ Content generated successfully!")
967
-
968
- # Display chat history
969
- for msg in st.session_state.chat_history:
970
- if isinstance(msg, HumanMessage):
971
- st.markdown(f"**🧑 You:** {msg.content}")
972
- elif isinstance(msg, AIMessage):
973
- st.markdown(f"**🤖 Assistant:** {msg.content}")
974
-
975
 
976
  def main():
977
  """Main entry point"""
 
1
  """
2
+ Main Streamlit Application - GEO SEO AI Optimizer
3
  Entry point for the application with UI components
4
  """
5
 
 
8
  import tempfile
9
  import json
10
  from typing import Dict, Any, List
 
11
 
12
  # Import our custom modules
13
  from utils.parser import PDFParser, TextParser, WebpageParser
14
  from utils.scorer import GEOScorer
15
+ from utils.optimizer import ContentOptimizer
16
  from utils.chunker import VectorChunker
17
  from utils.export import ResultExporter
18
 
19
  # Import LangChain components
20
  from langchain_groq import ChatGroq
21
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
 
22
 
23
  class GEOSEOApp:
24
  """Main application class that orchestrates all components"""
 
41
  """Initialize LLM and embedding models"""
42
  self.llm = ChatGroq(
43
  api_key=self.groq_api_key,
44
+ model_name="llama3-8b-8192",
45
  temperature=0.1
46
  )
47
 
48
  self.embeddings = HuggingFaceEmbeddings(
49
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
50
+ cache_folder="./hf_cache",
 
 
 
51
  )
52
 
53
  def setup_parsers(self):
 
57
  self.webpage_parser = WebpageParser()
58
 
59
  def setup_components(self):
60
+ """Initialize processing components"""
61
  self.geo_scorer = GEOScorer(self.llm)
62
+ self.content_optimizer = ContentOptimizer(self.llm)
63
  self.vector_chunker = VectorChunker(self.embeddings)
 
 
 
 
64
  self.result_exporter = ResultExporter()
65
 
66
  def run(self):
 
72
  )
73
 
74
  st.title("🚀 GEO SEO AI Optimizer")
75
+ st.markdown("*Optimize your content for AI search engines and LLM systems*")
76
 
77
  # Sidebar
78
  self.render_sidebar()
79
 
80
  # Main tabs
81
+ tab1, tab2, tab3 = st.tabs([
82
  "🌐 Website GEO Analysis",
83
+ "🔧 Content Enhancement",
84
+ "📄 Document Q&A",
 
85
  ])
86
 
87
  with tab1:
88
  self.render_website_analysis_tab()
89
 
90
  with tab2:
91
+ self.render_content_enhancement_tab()
92
 
93
  with tab3:
94
  self.render_document_qa_tab()
 
 
 
95
 
96
  def render_sidebar(self):
97
  """Render sidebar with information and controls"""
98
  st.sidebar.title("🛠️ GEO Tools")
99
+ st.sidebar.markdown("- 📄 Document Q&A with RAG")
100
+ st.sidebar.markdown("- 🔧 Content Enhancement")
101
  st.sidebar.markdown("- 🌐 Website GEO Analysis")
 
102
  st.sidebar.markdown("- 📊 AI-First SEO Scoring")
103
+
104
+ st.sidebar.markdown("---")
105
+ st.sidebar.markdown("### 🔧 Configuration")
106
+ st.sidebar.markdown("Set your API keys:")
107
+ st.sidebar.code("export GROQ_API_KEY='your-key'")
108
 
109
  st.sidebar.markdown("---")
110
  st.sidebar.markdown("### 📖 GEO Metrics")
 
112
  st.sidebar.markdown("**Query Intent Matching**: How well content matches user queries")
113
  st.sidebar.markdown("**Conversational Readiness**: Suitability for AI chat responses")
114
  st.sidebar.markdown("**Citation Worthiness**: Probability of being cited by AI")
 
 
115
 
116
  st.sidebar.markdown("---")
117
+ st.sidebar.markdown("### ℹ️ Components")
118
+ st.sidebar.markdown("- **Parser**: Extract content from various sources")
119
+ st.sidebar.markdown("- **Scorer**: Analyze GEO performance")
120
+ st.sidebar.markdown("- **Optimizer**: Enhance content for AI")
121
+ st.sidebar.markdown("- **Chunker**: Create vector embeddings")
122
+ st.sidebar.markdown("- **Exporter**: Generate reports")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  def render_document_qa_tab(self):
125
  """Render Document Q&A tab"""
 
182
  except Exception as e:
183
  st.error(f"An error occurred: {str(e)}")
184
 
185
+ def render_content_enhancement_tab(self):
186
+ """Render Content Enhancement tab"""
187
+ st.header("🔧 Content Enhancement")
188
+ st.markdown("Analyze and optimize your content for better AI/LLM performance.")
189
+
190
+ # Content input
191
+ input_text = st.text_area(
192
+ "Enter content to analyze and enhance:",
193
+ height=200,
194
+ key="enhancement_input"
195
+ )
196
+
197
+ # Analysis options
198
+ col1, col2 = st.columns(2)
199
+ with col1:
200
+ analyze_only = st.checkbox("Analysis only (no rewriting)", value=False)
201
+ with col2:
202
+ include_keywords = st.checkbox("Include keyword suggestions", value=True)
203
+
204
+ # Submit button
205
+ if st.button("🔧 Analyze & Enhance", key="enhancement_submit"):
206
+ if not input_text.strip():
207
+ st.warning("Please enter some content to analyze.")
208
+ return
209
+
210
+ try:
211
+ with st.spinner("Analyzing content..."):
212
+ # Run content analysis and optimization
213
+ result = self.content_optimizer.optimize_content(
214
+ input_text,
215
+ analyze_only=analyze_only,
216
+ include_keywords=include_keywords
217
+ )
218
+
219
+ if result.get("error"):
220
+ st.error(f"Analysis failed: {result['error']}")
221
+ return
222
+
223
+ # Display results
224
+ if analyze_only:
225
+ st.success("Content analysis and enhancement completed successfully!")
226
+ st.markdown("### 📊 Analysis Results")
227
+
228
+ # Show scores
229
+ scores = result.get("scores", {})
230
+ if scores:
231
+ col1, col2, col3 = st.columns(3)
232
+
233
+ with col1:
234
+ clarity = scores.get("clarity", 0)
235
+ st.metric("Clarity", f"{clarity}/10")
236
+
237
+ with col2:
238
+ structure = scores.get("structuredness", 0)
239
+ st.metric("Structure", f"{structure}/10")
240
+
241
+ with col3:
242
+ answerability = scores.get("answerability", 0)
243
+ st.metric("Answerability", f"{answerability}/10")
244
+
245
+ # Show keywords
246
+ keywords = result.get("keywords", [])
247
+ if keywords:
248
+ st.markdown("#### 🔑 Key Terms")
249
+ st.write(", ".join(keywords))
250
+
251
+ # Show optimized content
252
+ optimized_text = result.get("optimized_text", "")
253
+ # if optimized_text and not analyze_only:
254
+ st.markdown("#### ✨ Optimized Content")
255
+ st.text_area(
256
+ "Enhanced version:",
257
+ value=optimized_text,
258
+ height=200,
259
+ key="optimized_output"
260
+ )
261
+
262
+ # Export option
263
+ if st.button("📥 Export Results"):
264
+ export_data = self.result_exporter.export_enhancement_results(result)
265
+ st.download_button(
266
+ label="Download Analysis Report",
267
+ data=json.dumps(export_data, indent=2),
268
+ file_name=f"content_analysis_{int(time.time())}.json",
269
+ mime="application/json"
270
+ )
271
+
272
+ except Exception as e:
273
+ st.error(f"An error occurred: {str(e)}")
274
+
275
  def render_website_analysis_tab(self):
276
  """Render Website GEO Analysis tab"""
277
  st.header("🌐 Website GEO Analysis")
 
499
  tmp_file.write(uploaded_file.read())
500
  return tmp_file.name
501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
 
503
  def main():
504
  """Main entry point"""
requirements.txt CHANGED
@@ -13,4 +13,8 @@ requests
13
  numpy
14
  pandas
15
  openpyxl
16
- torch
 
 
 
 
 
13
  numpy
14
  pandas
15
  openpyxl
16
+ torch
17
+ langdetect
18
+ transformers
19
+ sentencepiece
20
+ openai-whisper
utils/lang_utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langdetect import detect
2
+ from transformers import pipeline
3
+
4
+ # Detect language of the input text
5
+ def detect_language(text: str) -> str:
6
+ try:
7
+ return detect(text)
8
+ except:
9
+ return "unknown"
10
+
11
+ # Translate text to English (or another target language)
12
+ def translate_text(text: str, target_lang: str = "en") -> str:
13
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
14
+ return translator(text)[0]["translation_text"]
utils/optimizer.py CHANGED
@@ -1,354 +1,562 @@
1
- # Enhanced Content Optimization Module with RAG for GEO
2
- # Integrates RAG functionality for better Generative Engine Optimization
 
 
3
 
4
  import json
5
  import re
6
  from typing import Dict, Any, List, Optional
7
  from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
8
- from langchain.schema import Document
9
 
10
 
11
  class ContentOptimizer:
12
- """Enhanced Content Optimizer with RAG capabilities for GEO"""
13
 
14
- def __init__(self, llm, vector_chunker=None):
15
  self.llm = llm
16
- self.vector_chunker = vector_chunker
17
  self.setup_prompts()
18
- self.setup_geo_knowledge_base()
19
-
20
- def setup_geo_knowledge_base(self):
21
- """Initialize GEO best practices knowledge base"""
22
- self.geo_knowledge = [
23
- """
24
- Generative Engine Optimization (GEO) Best Practices:
25
-
26
- 1. Structure for AI Consumption:
27
- - Use clear headings and subheadings
28
- - Include bullet points and numbered lists
29
- - Provide direct, concise answers to common questions
30
- - Use schema markup when possible
31
-
32
- 2. Content Format for LLMs:
33
- - Answer questions directly in the first sentence
34
- - Use "what, why, how" question patterns
35
- - Include relevant entities and proper nouns
36
- - Maintain factual accuracy with citations
37
-
38
- 3. Semantic Optimization:
39
- - Include related terms and synonyms
40
- - Use entity-rich content (people, places, organizations)
41
- - Connect concepts with clear relationships
42
- - Optimize for topic clusters, not just keywords
43
- """,
44
-
45
- """
46
- AI Search Visibility Optimization:
47
-
48
- 1. Query Intent Matching:
49
- - Address user intent explicitly
50
- - Use natural language patterns
51
- - Include question-answer pairs
52
- - Optimize for conversational queries
53
-
54
- 2. Citation Worthiness:
55
- - Include authoritative sources and data
56
- - Use specific facts and statistics
57
- - Provide expert opinions and insights
58
- - Maintain consistent tone and expertise
59
-
60
- 3. Multi-Query Coverage:
61
- - Address related questions in the same content
62
- - Use comprehensive topic coverage
63
- - Include long-tail and specific queries
64
- - Provide context for complex topics
65
- """,
66
-
67
- """
68
- Content Structure for AI Systems:
69
-
70
- 1. Information Architecture:
71
- - Lead with key information
72
- - Use inverted pyramid structure
73
- - Include table of contents for long content
74
- - Break complex topics into digestible sections
75
-
76
- 2. Conversational Readiness:
77
- - Write in active voice
78
- - Use clear, direct language
79
- - Include transitional phrases
80
- - Optimize sentence length (12-20 words)
81
-
82
- 3. Context Completeness:
83
- - Define technical terms
84
- - Provide background information
85
- - Include relevant examples
86
- - Connect to broader topic context
87
- """
88
- ]
89
 
90
  def setup_prompts(self):
91
- """Initialize optimization prompts with RAG integration"""
92
- self.rag_enhancement_prompt = """
93
- You are a Generative Engine Optimization (GEO) specialist with access to best practices knowledge.
94
-
95
- Based on the provided GEO knowledge and the user's content, optimize the content for:
96
- 1. AI search engines (ChatGPT, Claude, Gemini)
97
- 2. LLM-based question answering systems
98
- 3. Conversational AI interfaces
99
- 4. Citation and reference systems
100
-
101
- Use the knowledge base to inform your optimization decisions.
102
-
103
- Knowledge Base Context:
104
- {context}
105
-
106
- Original Content:
107
- {content}
108
-
109
- Provide comprehensive GEO optimization in JSON format:
110
- ```json
111
- {{
112
- "geo_analysis": {{
113
- "current_geo_score": 7.5,
114
- "ai_search_visibility": 8.0,
115
- "query_intent_matching": 7.0,
116
- "conversational_readiness": 8.5,
117
- "citation_worthiness": 6.5,
118
- "context_completeness": 7.5
119
- }},
120
- "optimization_opportunities": [
121
- {{
122
- "type": "Structure Enhancement",
123
- "description": "Add clear headings and Q&A format",
124
- "priority": "high",
125
- "expected_impact": "Improve AI parsing by 25%"
126
- }}
127
- ],
128
- "optimized_content": {{
129
- "enhanced_text": "Your optimized content here...",
130
- "structural_improvements": ["Added FAQ section", "Improved headings"],
131
- "semantic_enhancements": ["Added related terms", "Improved entity density"]
132
- }},
133
- "geo_keywords": {{
134
- "primary_entities": ["entity1", "entity2"],
135
- "semantic_terms": ["term1", "term2"],
136
- "question_patterns": ["What is...", "How does..."],
137
- "related_concepts": ["concept1", "concept2"]
138
- }},
139
- "recommendations": [
140
- "Add more specific examples",
141
- "Include authoritative citations",
142
- "Improve conversational flow"
143
- ]
144
- }}
145
- ```
146
- """.strip()
147
-
148
- self.competitive_geo_prompt = """
149
- Analyze the content against GEO best practices and identify competitive optimization opportunities.
150
-
151
- GEO Knowledge Base:
152
- {context}
153
 
154
- Content to Analyze:
155
- {content}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
- Provide competitive GEO analysis:
158
- ```json
159
- {{
160
- "competitive_gaps": {{
161
- "missing_question_patterns": ["What questions aren't covered"],
162
- "entity_gaps": ["Important entities not mentioned"],
163
- "semantic_opportunities": ["Related terms to include"],
164
- "structural_weaknesses": ["Formatting issues for AI"]
165
- }},
166
- "benchmark_comparison": {{
167
- "current_performance": {{
168
- "ai_answerability": 6.5,
169
- "semantic_richness": 7.0,
170
- "structural_clarity": 8.0
171
- }},
172
- "optimization_potential": {{
173
- "ai_answerability": 9.0,
174
- "semantic_richness": 8.5,
175
- "structural_clarity": 9.5
176
- }}
177
- }},
178
- "action_plan": [
179
- {{
180
- "priority": "high",
181
- "action": "Add FAQ section",
182
- "rationale": "Improves direct question answering"
183
- }}
184
- ]
185
- }}
186
- ```
187
- """.strip()
188
 
189
-
190
- def optimize_content_with_rag(self, content: str, optimization_type: str = "geo_standard", analyze_only: bool = False) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  try:
192
- knowledge_docs = [Document(page_content=k, metadata={"source": "geo_best_practices"}) for k in self.geo_knowledge]
193
- context = "\n\n".join(self.geo_knowledge)
194
-
195
- if self.vector_chunker:
196
- qa_chain = self.vector_chunker.create_qa_chain(knowledge_docs, self.llm)
197
- geo_query = f"How to optimize this type of content for AI search engines: {content[:500]}"
198
- context_result = qa_chain({"query": geo_query})
199
- context = context_result.get("result", context)
200
-
201
- return self._competitive_geo_optimization(content, context) if optimization_type == "competitive_geo" else self._standard_geo_optimization(content, context, analyze_only)
202
-
203
  except Exception as e:
204
- return {"error": f"RAG-enhanced optimization failed: {str(e)}"}
205
-
206
- def _standard_geo_optimization(self, content: str, context: str, analyze_only: bool) -> Dict[str, Any]:
 
207
  try:
208
- prompt = ChatPromptTemplate.from_messages([
209
- SystemMessagePromptTemplate.from_template(self.rag_enhancement_prompt),
210
- HumanMessagePromptTemplate.from_template("Optimize this content using GEO best practices.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  ])
212
- result = (prompt | self.llm).invoke({"context": context, "content": content[:5000]})
213
- parsed = self._parse_optimization_result(getattr(result, 'content', str(result)))
214
- parsed.update({
215
- 'optimization_type': 'geo_standard',
216
- 'rag_enhanced': True,
 
 
 
 
 
 
 
 
217
  'analyze_only': analyze_only,
218
  'original_length': len(content),
219
- 'knowledge_sources': len(self.geo_knowledge)
220
  })
221
- return parsed
 
 
222
  except Exception as e:
223
- return {"error": f"Standard GEO optimization failed: {str(e)}"}
224
-
225
- def _competitive_geo_optimization(self, content: str, context: str) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  try:
227
- prompt = ChatPromptTemplate.from_messages([
228
- SystemMessagePromptTemplate.from_template(self.competitive_geo_prompt),
229
- HumanMessagePromptTemplate.from_template("Perform competitive GEO analysis.")
 
 
230
  ])
231
- result = (prompt | self.llm).invoke({"context": context, "content": content[:5000]})
232
- parsed = self._parse_optimization_result(getattr(result, 'content', str(result)))
233
- parsed.update({
234
- 'optimization_type': 'competitive_geo',
235
- 'rag_enhanced': True,
 
 
 
 
236
  'competitive_analysis': True
237
  })
238
- return parsed
 
 
239
  except Exception as e:
240
- return {"error": f"Competitive GEO optimization failed: {str(e)}"}
241
-
242
- def batch_optimize_with_rag(self, content_list: List[str], optimization_type: str = "geo_standard") -> List[Dict[str, Any]]:
 
 
 
 
 
 
 
 
 
 
243
  results = []
 
244
  for i, content in enumerate(content_list):
245
  try:
246
- result = self.optimize_content_with_rag(content, optimization_type)
 
 
 
247
  result['batch_index'] = i
248
  results.append(result)
 
249
  except Exception as e:
250
  results.append({
251
  'batch_index': i,
252
- 'error': f"Batch GEO optimization failed: {str(e)}"
253
  })
 
254
  return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
- def analyze_geo_readability(self, content: str) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  try:
 
258
  words = content.split()
259
- sentences = [s.strip() for s in re.split(r'[.!?]+', content) if s.strip()]
 
 
260
  paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
261
-
262
- metrics = {
263
- 'questions': len(re.findall(r'\?', content)),
264
- 'headings': len(re.findall(r'^#+\s', content, re.MULTILINE)),
265
- 'lists': len(re.findall(r'^\s*[-*+]\s', content, re.MULTILINE)),
266
- 'entities': len(re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content)),
267
- 'numbers': len(re.findall(r'\b\d+\.?\d*\b', content)),
268
- 'sentence_count': len(sentences),
269
- 'word_count': len(words)
270
- }
271
-
272
- geo_score = self._calculate_geo_readability_score({
273
- 'avg_words_per_sentence': metrics['word_count'] / metrics['sentence_count'] if metrics['sentence_count'] else 0,
274
- 'questions_ratio': metrics['questions'] / metrics['sentence_count'] if metrics['sentence_count'] else 0,
275
- 'structure_elements': metrics['headings'] + metrics['lists'],
276
- 'entity_density': metrics['entities'] / metrics['word_count'] if metrics['word_count'] else 0,
277
- 'numeric_data': metrics['numbers'] / metrics['word_count'] if metrics['word_count'] else 0
278
- })
279
-
280
  return {
281
- 'geo_readability_metrics': metrics,
282
- 'geo_readability_score': geo_score,
283
- 'geo_recommendations': self._generate_geo_recommendations(metrics)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  }
 
285
  except Exception as e:
286
- return {'error': f"GEO readability analysis failed: {str(e)}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- def _calculate_geo_readability_score(self, m: Dict[str, float]) -> float:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  try:
290
- score = (
291
- max(0, 10 - abs(m['avg_words_per_sentence'] - 15) * 0.3) * 0.2 +
292
- min(10, m['questions_ratio'] * 50) * 0.25 +
293
- min(10, m['structure_elements'] * 1.5) * 0.25 +
294
- min(10, m['entity_density'] * 100) * 0.15 +
295
- min(10, m['numeric_data'] * 200) * 0.15
296
- )
297
- return round(score, 1)
298
- except Exception:
299
- return 5.0
300
 
301
- def _generate_geo_recommendations(self, m: Dict[str, int]) -> List[str]:
302
- r = []
303
- if m['questions'] == 0:
304
- r.append("Add FAQ section or question-based headings.")
305
- if m['headings'] < 2:
306
- r.append("Use more structured headings.")
307
- if m['lists'] == 0:
308
- r.append("Include bullet points or numbered lists.")
309
- if m['entities'] < 5:
310
- r.append("Add named or topical entities.")
311
- if m['questions'] / m['sentence_count'] < 0.1:
312
- r.append("Transform statements into Q&A pairs.")
313
- return r
314
 
315
- def _clean_json_string(self, json_str: str) -> str:
316
- json_str = json_str.replace("...", "")
317
- json_str = re.sub(r",\s*([}\]])", r"\\1", json_str)
318
- json_str = json_str.strip('`')
319
- return json_str
320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]:
 
322
  try:
323
- start = response_text.find('{')
324
- end = response_text.rfind('}') + 1
325
- if start != -1 and end != -1:
326
- json_str = self._clean_json_string(response_text[start:end])
327
- return json.loads(json_str)
328
- return {
329
- 'raw_response': response_text,
330
- 'parsing_error': 'No JSON structure found in response',
331
- 'geo_analysis': {},
332
- 'recommendations': []
333
- }
 
 
 
 
 
 
 
 
 
 
334
  except json.JSONDecodeError as e:
335
  return {
336
  'raw_response': response_text,
337
  'parsing_error': f'JSON decode error: {str(e)}',
338
- 'geo_analysis': {},
339
- 'recommendations': []
340
  }
341
  except Exception as e:
342
  return {
343
  'raw_response': response_text,
344
- 'parsing_error': f'Unexpected error: {str(e)}',
345
- 'geo_analysis': {},
346
- 'recommendations': []
347
  }
348
-
349
- # Legacy support methods
350
- def optimize_content(self, content: str, analyze_only: bool = False, include_keywords: bool = True, optimization_type: str = "standard") -> Dict[str, Any]:
351
- return self.optimize_content_with_rag(content, optimization_type, analyze_only)
352
-
353
- def analyze_content_readability(self, content: str) -> Dict[str, Any]:
354
- return self.analyze_geo_readability(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Content Optimization Module
3
+ Enhances content for better AI/LLM performance and GEO scores
4
+ """
5
 
6
  import json
7
  import re
8
  from typing import Dict, Any, List, Optional
9
  from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
 
10
 
11
 
12
  class ContentOptimizer:
13
+ """Main class for optimizing content for AI search engines"""
14
 
15
+ def __init__(self, llm):
16
  self.llm = llm
 
17
  self.setup_prompts()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def setup_prompts(self):
20
+ """Initialize optimization prompts"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # Main content enhancement prompt
23
+ self.enhancement_prompt = (
24
+ "You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.\n\n"
25
+ "Evaluate the input text based on the following criteria, assigning a score from 1-10 for each:\n"
26
+ "- Clarity: How easily can the content be understood?\n"
27
+ "- Structuredness: How well-organized and coherent is the content?\n"
28
+ "- LLM Answerability: How easily can an LLM extract precise answers from the content?\n\n"
29
+ "Identify the most salient keywords.\n\n"
30
+ "Rewrite the text to improve:\n"
31
+ "- Clarity and precision\n"
32
+ "- Logical structure and flow\n"
33
+ "- Suitability for LLM-based information retrieval\n\n"
34
+ "Present your analysis and optimized text in the following JSON format:\n"
35
+ "```json\n"
36
+ "{{\n"
37
+ " \"scores\": {{\n"
38
+ " \"clarity\": 8.5,\n"
39
+ " \"structuredness\": 7.0,\n"
40
+ " \"answerability\": 9.0\n"
41
+ " }},\n"
42
+ " \"keywords\": [\"example\", \"installation\", \"setup\"],\n"
43
+ " \"optimized_text\": \"...\"\n"
44
+ "}}\n"
45
+ "```"
46
+ )
47
 
48
+ # SEO-style optimization prompt
49
+ self.seo_style_prompt = (
50
+ "You are an AI-first SEO specialist. Optimize this content for AI search engines and LLM systems. "
51
+ "Focus on:\n"
52
+ "1. Semantic keyword optimization\n"
53
+ "2. Question-answer format enhancement\n"
54
+ "3. Factual accuracy and authority signals\n"
55
+ "4. Conversational readiness\n"
56
+ "5. Citation-worthy structure\n"
57
+ "Provide analysis and optimization in JSON:\n"
58
+ "```json\n"
59
+ "{{\n"
60
+ " \"seo_analysis\": {{\n"
61
+ " \"keyword_density\": \"analysis of current keywords\",\n"
62
+ " \"semantic_gaps\": [\"missing semantic terms\"],\n"
63
+ " \"readability_score\": 8.5,\n"
64
+ " \"authority_signals\": [\"credentials\", \"citations\"]\n"
65
+ " }},\n"
66
+ " \"optimized_content\": {{\n"
67
+ " \"title_suggestions\": [\"optimized title 1\", \"optimized title 2\"],\n"
68
+ " \"meta_description\": \"AI-optimized meta description\",\n"
69
+ " \"enhanced_content\": \"full optimized content...\",\n"
70
+ " \"structured_data_suggestions\": [\"schema markup recommendations\"]\n"
71
+ " }},\n"
72
+ " \"improvement_summary\": {{\n"
73
+ " \"changes_made\": [\"change 1\", \"change 2\"],\n"
74
+ " \"expected_impact\": \"description of expected improvements\"\n"
75
+ " }}\n"
76
+ "}}\n"
77
+ "```"
78
+ )
79
 
80
+ # Competitive content analysis prompt
81
+ self.competitive_analysis_prompt = (
82
+ "Compare this content against best practices for AI search optimization. Identify gaps and opportunities.\n"
83
+ "Original Content: {content}\n"
84
+ "Analyze against these AI search factors:\n"
85
+ "- Entity recognition and linking\n"
86
+ "- Question coverage completeness\n"
87
+ "- Factual statement clarity\n"
88
+ "- Conversational flow\n"
89
+ "- Semantic relationship mapping\n\n"
90
+ "Provide competitive analysis in JSON format with specific recommendations:\n"
91
+ "{{\n"
92
+ " \"competitive_analysis\": {{\n"
93
+ " \"entity_gaps\": [\"gap1\", \"gap2\"],\n"
94
+ " \"question_coverage\": \"summary of coverage\",\n"
95
+ " \"factual_clarity\": \"assessment\",\n"
96
+ " \"conversational_flow\": \"assessment\",\n"
97
+ " \"semantic_relationships\": [\"relationship1\", \"relationship2\"]\n"
98
+ " }},\n"
99
+ " \"recommendations\": [\"recommendation 1\", \"recommendation 2\"]\n"
100
+ "}}\n"
101
+ )
102
+
103
+ def optimize_content(self, content: str, analyze_only: bool = False,
104
+ include_keywords: bool = True, optimization_type: str = "standard") -> Dict[str, Any]:
105
+ """
106
+ Main content optimization function
107
+ Args:
108
+ content (str): Content to optimize
109
+ analyze_only (bool): If True, only analyze without rewriting
110
+ include_keywords (bool): Whether to include keyword analysis
111
+ optimization_type (str): Type of optimization ("standard", "seo", "competitive")
112
+ Returns:
113
+ Dict: Optimization results with scores and enhanced content
114
+ """
115
  try:
116
+ # Choose optimization approach
117
+ if optimization_type == "seo":
118
+ return self._seo_style_optimization(content, analyze_only)
119
+ elif optimization_type == "competitive":
120
+ return self._competitive_optimization(content)
121
+ else:
122
+ return self._standard_optimization(content, analyze_only, include_keywords)
123
+
 
 
 
124
  except Exception as e:
125
+ return {'error': f"Optimization failed: {str(e)}"}
126
+
127
+ def _standard_optimization(self, content: str, analyze_only: bool, include_keywords: bool) -> Dict[str, Any]:
128
+ """Standard content optimization using enhancement prompt"""
129
  try:
130
+ # Modify prompt based on options
131
+ prompt_text = self.enhancement_prompt
132
+
133
+ if analyze_only:
134
+ prompt_text = prompt_text.replace(
135
+ "Rewrite the text to improve:",
136
+ "Analyze the text for potential improvements in:"
137
+ ).replace(
138
+ '"optimized_text": "..."',
139
+ '"optimization_suggestions": ["suggestion 1", "suggestion 2"]'
140
+ )
141
+
142
+ if not include_keywords:
143
+ prompt_text = prompt_text.replace(
144
+ '"keywords": ["example", "installation", "setup"],',
145
+ ''
146
+ )
147
+
148
+ # Create and run chain
149
+ prompt_template = ChatPromptTemplate.from_messages([
150
+ SystemMessagePromptTemplate.from_template(prompt_text),
151
+ HumanMessagePromptTemplate.from_template(content[:6000]) # Limit content length
152
  ])
153
+ # ("system", prompt_text),
154
+ # ("user", content[:6000]) # Limit content length
155
+
156
+ chain = prompt_template | self.llm
157
+ result = chain.invoke({})
158
+
159
+ # Parse result
160
+ result_content = result.content if hasattr(result, 'content') else str(result)
161
+ parsed_result = self._parse_optimization_result(result_content)
162
+
163
+ # Add metadata
164
+ parsed_result.update({
165
+ 'optimization_type': 'standard',
166
  'analyze_only': analyze_only,
167
  'original_length': len(content),
168
+ 'original_word_count': len(content.split())
169
  })
170
+
171
+ return parsed_result
172
+
173
  except Exception as e:
174
+ return {'error': f"Standard optimization failed: {str(e)}"}
175
+
176
+ def _seo_style_optimization(self, content: str, analyze_only: bool) -> Dict[str, Any]:
177
+ """SEO-focused optimization for AI search engines"""
178
+ try:
179
+ prompt_template = ChatPromptTemplate.from_messages([
180
+ ("system", self.seo_style_prompt),
181
+ ("user", f"Optimize this content for AI search engines:\n\n{content[:6000]}")
182
+ ])
183
+
184
+ chain = prompt_template | self.llm
185
+ result = chain.invoke({})
186
+
187
+ result_content = result.content if hasattr(result, 'content') else str(result)
188
+ parsed_result = self._parse_optimization_result(result_content)
189
+
190
+ # Add SEO-specific metadata
191
+ parsed_result.update({
192
+ 'optimization_type': 'seo',
193
+ 'analyze_only': analyze_only,
194
+ 'seo_focused': True
195
+ })
196
+
197
+ return parsed_result
198
+
199
+ except Exception as e:
200
+ return {'error': f"SEO optimization failed: {str(e)}"}
201
+
202
+ def _competitive_optimization(self, content: str) -> Dict[str, Any]:
203
+ """Competitive analysis-based optimization"""
204
  try:
205
+ formatted_prompt = self.competitive_analysis_prompt.format(content=content[:5000])
206
+
207
+ prompt_template = ChatPromptTemplate.from_messages([
208
+ ("system", formatted_prompt),
209
+ ("user", "Perform the competitive analysis and provide optimization recommendations.")
210
  ])
211
+
212
+ chain = prompt_template | self.llm
213
+ result = chain.invoke({})
214
+
215
+ result_content = result.content if hasattr(result, 'content') else str(result)
216
+ parsed_result = self._parse_optimization_result(result_content)
217
+
218
+ parsed_result.update({
219
+ 'optimization_type': 'competitive',
220
  'competitive_analysis': True
221
  })
222
+
223
+ return parsed_result
224
+
225
  except Exception as e:
226
+ return {'error': f"Competitive optimization failed: {str(e)}"}
227
+
228
+ def batch_optimize_content(self, content_list: List[str], optimization_type: str = "standard") -> List[Dict[str, Any]]:
229
+ """
230
+ Optimize multiple pieces of content in batch
231
+
232
+ Args:
233
+ content_list (List[str]): List of content pieces to optimize
234
+ optimization_type (str): Type of optimization to apply
235
+
236
+ Returns:
237
+ List[Dict]: List of optimization results
238
+ """
239
  results = []
240
+
241
  for i, content in enumerate(content_list):
242
  try:
243
+ result = self.optimize_content(
244
+ content,
245
+ optimization_type=optimization_type
246
+ )
247
  result['batch_index'] = i
248
  results.append(result)
249
+
250
  except Exception as e:
251
  results.append({
252
  'batch_index': i,
253
+ 'error': f"Batch optimization failed: {str(e)}"
254
  })
255
+
256
  return results
257
+
258
+ def generate_content_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]:
259
+ """
260
+ Generate multiple optimized variations of the same content
261
+
262
+ Args:
263
+ content (str): Original content
264
+ num_variations (int): Number of variations to generate
265
+
266
+ Returns:
267
+ List[Dict]: List of content variations with analysis
268
+ """
269
+ variations = []
270
+
271
+ variation_prompts = [
272
+ "Create a more conversational version optimized for AI chat responses",
273
+ "Create a more authoritative version optimized for citations",
274
+ "Create a more structured version optimized for question-answering"
275
+ ]
276
+
277
+ for i in range(min(num_variations, len(variation_prompts))):
278
+ try:
279
+ custom_prompt = f"""You are optimizing content for AI systems. {variation_prompts[i]}.
280
 
281
+ Original content: {content[:4000]}
282
+
283
+ Provide the optimized variation in JSON format:
284
+ ```json
285
+ {{
286
+ "variation_type": "conversational/authoritative/structured",
287
+ "optimized_content": "the rewritten content...",
288
+ "key_changes": ["change 1", "change 2"],
289
+ "target_use_case": "description of ideal use case"
290
+ }}
291
+ ```"""
292
+
293
+ prompt_template = ChatPromptTemplate.from_messages([
294
+ ("system", custom_prompt),
295
+ ("user", "Generate the variation.")
296
+ ])
297
+
298
+ chain = prompt_template | self.llm
299
+ result = chain.invoke({})
300
+
301
+ result_content = result.content if hasattr(result, 'content') else str(result)
302
+ parsed_result = self._parse_optimization_result(result_content)
303
+
304
+ parsed_result.update({
305
+ 'variation_index': i,
306
+ 'variation_prompt': variation_prompts[i]
307
+ })
308
+
309
+ variations.append(parsed_result)
310
+
311
+ except Exception as e:
312
+ variations.append({
313
+ 'variation_index': i,
314
+ 'error': f"Variation generation failed: {str(e)}"
315
+ })
316
+
317
+ return variations
318
+
319
+ def analyze_content_readability(self, content: str) -> Dict[str, Any]:
320
+ """
321
+ Analyze content readability for AI systems
322
+
323
+ Args:
324
+ content (str): Content to analyze
325
+
326
+ Returns:
327
+ Dict: Readability analysis results
328
+ """
329
  try:
330
+ # Basic readability metrics
331
  words = content.split()
332
+ sentences = re.split(r'[.!?]+', content)
333
+ sentences = [s.strip() for s in sentences if s.strip()]
334
+
335
  paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
336
+
337
+ # Calculate metrics
338
+ avg_words_per_sentence = len(words) / len(sentences) if sentences else 0
339
+ avg_sentences_per_paragraph = len(sentences) / len(paragraphs) if paragraphs else 0
340
+
341
+ # Character-based metrics
342
+ avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
343
+
344
+ # Complexity indicators
345
+ long_sentences = [s for s in sentences if len(s.split()) > 20]
346
+ complex_words = [w for w in words if len(w) > 6]
347
+
 
 
 
 
 
 
 
348
  return {
349
+ 'basic_metrics': {
350
+ 'total_words': len(words),
351
+ 'total_sentences': len(sentences),
352
+ 'total_paragraphs': len(paragraphs),
353
+ 'avg_words_per_sentence': avg_words_per_sentence,
354
+ 'avg_sentences_per_paragraph': avg_sentences_per_paragraph,
355
+ 'avg_word_length': avg_word_length
356
+ },
357
+ 'complexity_indicators': {
358
+ 'long_sentences_count': len(long_sentences),
359
+ 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
360
+ 'complex_words_count': len(complex_words),
361
+ 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
362
+ },
363
+ 'ai_readability_score': self._calculate_ai_readability_score({
364
+ 'avg_words_per_sentence': avg_words_per_sentence,
365
+ 'avg_word_length': avg_word_length,
366
+ 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
367
+ }),
368
+ 'recommendations': self._generate_readability_recommendations({
369
+ 'avg_words_per_sentence': avg_words_per_sentence,
370
+ 'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
371
+ 'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
372
+ })
373
  }
374
+
375
  except Exception as e:
376
+ return {'error': f"Readability analysis failed: {str(e)}"}
377
+
378
+ def extract_key_entities(self, content: str) -> Dict[str, Any]:
379
+ """
380
+ Extract key entities and topics for optimization
381
+
382
+ Args:
383
+ content (str): Content to analyze
384
+
385
+ Returns:
386
+ Dict: Extracted entities and topics
387
+ """
388
+ try:
389
+ entity_prompt = """Extract key entities, topics, and concepts from this content for AI optimization.
390
 
391
+ Content: {content}
392
+
393
+ Identify:
394
+ 1. Named entities (people, places, organizations)
395
+ 2. Key concepts and topics
396
+ 3. Technical terms and jargon
397
+ 4. Potential semantic keywords
398
+ 5. Question-answer opportunities
399
+
400
+ Format as JSON:
401
+ ```json
402
+ {{
403
+ "named_entities": ["entity1", "entity2"],
404
+ "key_topics": ["topic1", "topic2"],
405
+ "technical_terms": ["term1", "term2"],
406
+ "semantic_keywords": ["keyword1", "keyword2"],
407
+ "question_opportunities": ["What is...", "How does..."],
408
+ "entity_relationships": ["relationship descriptions"]
409
+ }}
410
+ ```"""
411
+
412
+ prompt_template = ChatPromptTemplate.from_messages([
413
+ ("system", entity_prompt.format(content=content[:5000])),
414
+ ("user", "Extract the entities and topics.")
415
+ ])
416
+
417
+ chain = prompt_template | self.llm
418
+ result = chain.invoke({})
419
+
420
+ result_content = result.content if hasattr(result, 'content') else str(result)
421
+ return self._parse_optimization_result(result_content)
422
+
423
+ except Exception as e:
424
+ return {'error': f"Entity extraction failed: {str(e)}"}
425
+
426
+ def optimize_for_voice_search(self, content: str) -> Dict[str, Any]:
427
+ """
428
+ Optimize content specifically for voice search and conversational AI
429
+
430
+ Args:
431
+ content (str): Content to optimize
432
+
433
+ Returns:
434
+ Dict: Voice search optimization results
435
+ """
436
  try:
437
+ voice_prompt = """Optimize this content for voice search and conversational AI systems.
 
 
 
 
 
 
 
 
 
438
 
439
+ Focus on:
440
+ 1. Natural language patterns
441
+ 2. Question-based structure
442
+ 3. Conversational tone
443
+ 4. Clear, direct answers
444
+ 5. Featured snippet optimization
 
 
 
 
 
 
 
445
 
446
+ Original content: {content}
 
 
 
 
447
 
448
+ Provide optimization in JSON:
449
+ ```json
450
+ {{
451
+ "voice_optimized_content": "conversational version...",
452
+ "question_answer_pairs": [
453
+ {{"question": "What is...", "answer": "Direct answer..."}},
454
+ {{"question": "How does...", "answer": "Step by step..."}}
455
+ ],
456
+ "featured_snippet_candidates": ["snippet 1", "snippet 2"],
457
+ "natural_language_improvements": ["improvement 1", "improvement 2"],
458
+ "conversational_score": 8.5
459
+ }}
460
+ ```"""
461
+
462
+ prompt_template = ChatPromptTemplate.from_messages([
463
+ ("system", voice_prompt.format(content=content[:4000])),
464
+ ("user", "Optimize for voice search.")
465
+ ])
466
+
467
+ chain = prompt_template | self.llm
468
+ result = chain.invoke({})
469
+
470
+ result_content = result.content if hasattr(result, 'content') else str(result)
471
+ parsed_result = self._parse_optimization_result(result_content)
472
+
473
+ parsed_result.update({
474
+ 'optimization_type': 'voice_search',
475
+ 'voice_optimized': True
476
+ })
477
+
478
+ return parsed_result
479
+
480
+ except Exception as e:
481
+ return {'error': f"Voice search optimization failed: {str(e)}"}
482
+
483
  def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]:
484
+ """Parse LLM response and extract structured results"""
485
  try:
486
+ # Find JSON content in the response
487
+ json_start = response_text.find('{')
488
+ json_end = response_text.rfind('}') + 1
489
+
490
+ if json_start != -1 and json_end != -1:
491
+ json_str = response_text[json_start:json_end]
492
+ parsed = json.loads(json_str)
493
+
494
+ # Ensure consistent structure
495
+ if 'scores' not in parsed and 'score' in parsed:
496
+ parsed['scores'] = parsed['score']
497
+
498
+ return parsed
499
+ else:
500
+ # If no JSON found, return raw response with error flag
501
+ return {
502
+ 'raw_response': response_text,
503
+ 'parsing_error': 'No JSON structure found in response',
504
+ 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
505
+ }
506
+
507
  except json.JSONDecodeError as e:
508
  return {
509
  'raw_response': response_text,
510
  'parsing_error': f'JSON decode error: {str(e)}',
511
+ 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
 
512
  }
513
  except Exception as e:
514
  return {
515
  'raw_response': response_text,
516
+ 'parsing_error': f'Unexpected parsing error: {str(e)}',
517
+ 'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
 
518
  }
519
+
520
+ def _calculate_ai_readability_score(self, metrics: Dict[str, float]) -> float:
521
+ """Calculate AI-specific readability score"""
522
+ try:
523
+ # Optimal ranges for AI consumption
524
+ optimal_words_per_sentence = 15 # Sweet spot for AI processing
525
+ optimal_word_length = 5 # Balance of complexity and clarity
526
+ optimal_complex_words_percentage = 15 # Some complexity is good for authority
527
+
528
+ # Calculate deviations from optimal
529
+ sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - optimal_words_per_sentence) * 0.5)
530
+ word_length_score = max(0, 10 - abs(metrics['avg_word_length'] - optimal_word_length) * 2)
531
+ complexity_score = max(0, 10 - abs(metrics['complex_words_percentage'] - optimal_complex_words_percentage) * 0.3)
532
+
533
+ # Weighted average
534
+ overall_score = (sentence_score * 0.4 + word_length_score * 0.3 + complexity_score * 0.3)
535
+
536
+ return round(overall_score, 1)
537
+
538
+ except Exception:
539
+ return 5.0 # Default neutral score
540
+
541
+ def _generate_readability_recommendations(self, metrics: Dict[str, float]) -> List[str]:
542
+ """Generate specific readability improvement recommendations"""
543
+ recommendations = []
544
+
545
+ try:
546
+ if metrics['avg_words_per_sentence'] > 20:
547
+ recommendations.append("Break down long sentences for better AI processing")
548
+ elif metrics['avg_words_per_sentence'] < 8:
549
+ recommendations.append("Consider combining very short sentences for better context")
550
+
551
+ if metrics['long_sentences_percentage'] > 30:
552
+ recommendations.append("Reduce the number of complex sentences (>20 words)")
553
+
554
+ if metrics['complex_words_percentage'] > 25:
555
+ recommendations.append("Simplify vocabulary where possible for broader accessibility")
556
+ elif metrics['complex_words_percentage'] < 5:
557
+ recommendations.append("Add more specific terminology to establish authority")
558
+
559
+ return recommendations
560
+
561
+ except Exception:
562
+ return ["Unable to generate specific recommendations"]