Spaces:

MBilal-72
/

GenerativeEngineOptimization

Runtime error

App Files Files Community

MBilal-72 commited on Jul 26, 2025

Commit

68b0980

verified ·

1 Parent(s): 18aaeb9

update app.py after utils

Browse files

Files changed (1) hide show

app.py +459 -506

app.py CHANGED Viewed

@@ -1,555 +1,508 @@
 import os
 import tempfile
-import streamlit as st
 import json
-import requests
-from bs4 import BeautifulSoup
-from urllib.parse import urljoin, urlparse
-import time
-from typing import List, Dict, Any
-import pandas as pd
-from langchain_community.document_loaders import PyPDFLoader
-from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.chains import RetrievalQA
-from langchain.prompts import PromptTemplate, ChatPromptTemplate
-from langchain.schema import Document
-from langchain_groq import ChatGroq
-# --- Environment Variables ---
-GROQ_API_KEY = os.getenv("GROQ_API_KEY", "your-groq-api-key")
-HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key")
-# --- Initialize Groq LLM ---
-llm = ChatGroq(
-    api_key=GROQ_API_KEY,
-    model_name="llama3-8b-8192",
-    temperature=0.1
-)
-# --- HuggingFace Embeddings ---
-embedding = HuggingFaceEmbeddings(
-    model_name="sentence-transformers/all-MiniLM-L6-v2",
-    cache_folder="./hf_cache",
-)
-# --- System Prompt for Content Enhancement ---
-system_prompt = """You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.
-Evaluate the input text based on the following criteria, assigning a score from 1–10 for each:
-Clarity: How easily can the content be understood?
-Structuredness: How well-organized and coherent is the content?
-LLM Answerability: How easily can an LLM extract precise answers from the content?
-Identify the most salient keywords.
-Rewrite the text to improve:
-Clarity and precision
-Logical structure and flow
-Suitability for LLM-based information retrieval
-Present your analysis and optimized text in the following JSON format:
-```json
-{
-"score": {
-"clarity": 8.5,
-"structuredness": 7.0,
-"answerability": 9.0
-},
-"keywords": ["example", "installation", "setup"],
-"optimized_text": "..."
-}
-```"""
-# --- GEO Analysis System Prompt ---
-geo_analysis_prompt = """You are a Generative Engine Optimizer (GEO) specialist. Analyze the provided website content for its effectiveness in AI-powered search engines and LLM systems.
-Evaluate the content based on these GEO criteria (score 1-10 each):
-1. **AI Search Visibility**: How likely is this content to be surfaced by AI search engines?
-2. **Query Intent Matching**: How well does the content match common user queries?
-3. **Factual Accuracy & Authority**: How trustworthy and authoritative is the information?
-4. **Conversational Readiness**: How suitable is the content for AI chat responses?
-5. **Semantic Richness**: How well does the content use relevant semantic keywords?
-6. **Context Completeness**: Does the content provide complete, self-contained answers?
-7. **Citation Worthiness**: How likely are AI systems to cite this content?
-8. **Multi-Query Coverage**: Does the content answer multiple related questions?
-Also identify:
-- Primary topics and entities
-- Missing information gaps
-- Optimization opportunities
-- Specific enhancement recommendations
-Format your response as JSON:
-```json
-{
-  "geo_scores": {
-    "ai_search_visibility": 7.5,
-    "query_intent_matching": 8.0,
-    "factual_accuracy": 9.0,
-    "conversational_readiness": 6.5,
-    "semantic_richness": 7.0,
-    "context_completeness": 8.5,
-    "citation_worthiness": 7.8,
-    "multi_query_coverage": 6.0
-  },
-  "overall_geo_score": 7.5,
-  "primary_topics": ["topic1", "topic2"],
-  "entities": ["entity1", "entity2"],
-  "missing_gaps": ["gap1", "gap2"],
-  "optimization_opportunities": [
-    {
-      "type": "semantic_enhancement",
-      "description": "Add more related terms",
-      "priority": "high"
-    }
-  ],
-  "recommendations": [
-    "Specific actionable recommendation 1",
-    "Specific actionable recommendation 2"
-  ]
-}
-```"""
-# --- Website Scraping Functions ---
-def extract_website_content(url: str, max_pages: int = 5) -> List[Dict[str, Any]]:
-    """Extract content from website pages"""
-    try:
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        }
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.content, 'html.parser')
-        # Remove script and style elements
-        for script in soup(["script", "style", "nav", "footer", "header"]):
-            script.decompose()
-        # Extract main content
-        main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content') or soup.body
-        if main_content:
-            text_content = main_content.get_text(separator=' ', strip=True)
-        else:
-            text_content = soup.get_text(separator=' ', strip=True)
-        # Clean up text
-        lines = [line.strip() for line in text_content.split('\n') if line.strip()]
-        cleaned_text = ' '.join(lines)
-        # Extract metadata
-        title = soup.find('title').get_text() if soup.find('title') else "No Title"
-        meta_desc = soup.find('meta', attrs={'name': 'description'})
-        description = meta_desc.get('content') if meta_desc else "No Description"
-        # Extract headings
-        headings = []
-        for i in range(1, 7):
-            for heading in soup.find_all(f'h{i}'):
-                headings.append({
-                    'level': i,
-                    'text': heading.get_text(strip=True)
-                })
-        return [{
-            'url': url,
-            'title': title,
-            'description': description,
-            'content': cleaned_text[:10000],  # Limit content length
-            'headings': headings,
-            'word_count': len(cleaned_text.split())
-        }]
-    except Exception as e:
-        st.error(f"Error scraping {url}: {str(e)}")
-        return []
-def analyze_page_geo_score(content: str, title: str, llm) -> Dict[str, Any]:
-    """Analyze a single page for GEO score"""
-    try:
-        geo_prompt = ChatPromptTemplate.from_messages([
-            ("system", geo_analysis_prompt),
-            ("user", f"Title: {title}\n\nContent: {content}")
         ])
-        chain = geo_prompt | llm
-        result = chain.invoke({"input": f"Title: {title}\n\nContent: {content}"})
-        result_content = result.content if hasattr(result, 'content') else str(result)
-        # Extract JSON from response
-        json_start = result_content.find('{')
-        json_end = result_content.rfind('}') + 1
-        if json_start != -1 and json_end != -1:
-            json_str = result_content[json_start:json_end]
-            return json.loads(json_str)
-        else:
-            return {"error": "Could not parse GEO analysis"}
-    except Exception as e:
-        return {"error": f"Analysis failed: {str(e)}"}
-# --- Create Chat Prompt Template for Content Enhancement ---
-enhancement_prompt = ChatPromptTemplate.from_messages([
-    ("system", system_prompt),
-    ("user", "{input}")
-])
-# --- Streamlit UI ---
-st.set_page_config(page_title="AI Content Optimizer", page_icon="🚀", layout="wide")
-st.title("🚀 AI Content Optimizer & GEO Analyzer")
-# Sidebar
-st.sidebar.title("🛠️ Tools")
-st.sidebar.markdown("- 📄 Document Q&A")
-st.sidebar.markdown("- 🔧 Content Enhancement")
-st.sidebar.markdown("- 🌐 Website GEO Analysis")
-st.sidebar.markdown("- 📊 SEO-like Scoring")
-# Create tabs
-tab1, tab2, tab3 = st.tabs(["📄 Document Chat", "🔧 Content Enhancement", "🌐 Website GEO Analysis"])
-with tab1:
-    st.header("Document Question Answering")
-    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
-    pasted_text = st.text_area("Or paste some text below:", height=150)
-    user_query = st.text_input("Ask a question about the content")
-    submit_qa_button = st.button("Submit Question", key="qa_submit")
-    if submit_qa_button:
-        if not user_query.strip():
-            st.warning("Please enter a question.")
-            st.stop()
-        documents = []
-        if uploaded_file:
-            with st.spinner("Processing PDF..."):
-                with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
-                    tmp_file.write(uploaded_file.read())
-                    tmp_path = tmp_file.name
-                loader = PyPDFLoader(tmp_path)
-                documents = loader.load_and_split()
-                os.unlink(tmp_path)
-        elif pasted_text.strip():
-            documents = [Document(page_content=pasted_text)]
-        else:
-            st.warning("Please upload a PDF or paste some text.")
-            st.stop()
-        with st.spinner("Creating embeddings..."):
-            vectorstore = FAISS.from_documents(documents, embedding)
-            retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
-        qa_prompt_template = PromptTemplate(
-            input_variables=["context", "question"],
-            template="""You are an AI assistant. Use the following context to answer the question.
-            Be concise, accurate, and helpful. If the answer is not in the context, say so.
-            Context: {context}
-            Question: {question}
-            Answer:"""
-        )
-        qa_chain = RetrievalQA.from_chain_type(
-            llm=llm,
-            chain_type="stuff",
-            retriever=retriever,
-            return_source_documents=True,
-            chain_type_kwargs={"prompt": qa_prompt_template}
-        )
-        with st.spinner("Generating answer..."):
             try:
-                result = qa_chain({"query": user_query})
                 st.markdown("### 💬 Answer")
                 st.write(result["result"])
                 with st.expander("📄 Source Documents"):
-                    for i, doc in enumerate(result["source_documents"]):
                         st.write(f"**Source {i+1}:**")
-                        st.write(doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content)
                         if hasattr(doc, 'metadata') and doc.metadata:
                             st.write(f"*Metadata: {doc.metadata}*")
                         st.write("---")
             except Exception as e:
                 st.error(f"An error occurred: {str(e)}")
-with tab2:
-    st.header("Content Enhancement Analysis")
-    enhancement_text = st.text_area("Enter text to analyze and enhance:", height=200, key="enhancement_input")
-    submit_enhancement_button = st.button("Analyze & Enhance", key="enhancement_submit")
-    if submit_enhancement_button:
-        if not enhancement_text.strip():
-            st.warning("Please enter some text to analyze.")
-            st.stop()
-        with st.spinner("Analyzing content..."):
             try:
-                enhancement_chain = enhancement_prompt | llm
-                result = enhancement_chain.invoke({"input": enhancement_text})
-                result_content = result.content if hasattr(result, 'content') else str(result)
                 st.markdown("### 📊 Analysis Results")
-                try:
-                    json_start = result_content.find('{')
-                    json_end = result_content.rfind('}') + 1
-                    if json_start != -1 and json_end != -1:
-                        json_str = result_content[json_start:json_end]
-                        analysis_data = json.loads(json_str)
-                        st.markdown("#### Scores (1-10)")
-                        col1, col2, col3 = st.columns(3)
-                        with col1:
-                            clarity_score = analysis_data.get('score', {}).get('clarity', 'N/A')
-                            st.metric("Clarity", clarity_score)
-                        with col2:
-                            struct_score = analysis_data.get('score', {}).get('structuredness', 'N/A')
-                            st.metric("Structure", struct_score)
-                        with col3:
-                            answer_score = analysis_data.get('score', {}).get('answerability', 'N/A')
-                            st.metric("Answerability", answer_score)
-                        keywords = analysis_data.get('keywords', [])
-                        if keywords:
-                            st.markdown("#### 🔑 Key Terms")
-                            st.write(", ".join(keywords))
-                        optimized_text = analysis_data.get('optimized_text', '')
-                        if optimized_text:
-                            st.markdown("#### ✨ Optimized Content")
-                            st.text_area("Enhanced version:", value=optimized_text, height=200, key="optimized_output")
-                    else:
-                        st.markdown("#### Analysis Response")
-                        st.write(result_content)
-                except json.JSONDecodeError:
-                    st.markdown("#### Analysis Response")
-                    st.write(result_content)
             except Exception as e:
-                st.error(f"An error occurred during enhancement: {str(e)}")
-with tab3:
-    st.header("🌐 Website GEO Analysis")
-    st.markdown("Analyze any website for Generative Engine Optimization (GEO) - how well it performs with AI search engines.")
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        website_url = st.text_input("Enter website URL:", placeholder="https://example.com")
-    with col2:
-        max_pages = st.selectbox("Pages to analyze:", [1, 3, 5], index=0)
-    analyze_website_button = st.button("🔍 Analyze Website", key="website_analyze")
-    if analyze_website_button:
-        if not website_url.strip():
-            st.warning("Please enter a website URL.")
-            st.stop()
-        # Add https:// if not present
-        if not website_url.startswith(('http://', 'https://')):
-            website_url = 'https://' + website_url
-        with st.spinner(f"Analyzing website: {website_url}"):
             try:
-                # Extract website content
-                pages_data = extract_website_content(website_url, max_pages)
-                if not pages_data:
-                    st.error("Could not extract content from the website.")
-                    st.stop()
-                st.success(f"Successfully extracted content from {len(pages_data)} page(s)")
-                # Analyze each page
-                all_analyses = []
-                for i, page_data in enumerate(pages_data):
-                    with st.spinner(f"Analyzing page {i+1}/{len(pages_data)}..."):
-                        analysis = analyze_page_geo_score(
-                            page_data['content'],
-                            page_data['title'],
-                            llm
-                        )
-                        if 'error' not in analysis:
-                            analysis['page_data'] = page_data
-                            all_analyses.append(analysis)
-                        else:
-                            st.warning(f"Could not analyze page {i+1}: {analysis['error']}")
-                if all_analyses:
-                    # Display overall results
-                    st.markdown("## 📊 GEO Analysis Results")
-                    # Calculate average scores
-                    avg_scores = {}
-                    score_keys = list(all_analyses[0].get('geo_scores', {}).keys())
-                    for key in score_keys:
-                        scores = [analysis['geo_scores'][key] for analysis in all_analyses if 'geo_scores' in analysis]
-                        avg_scores[key] = sum(scores) / len(scores) if scores else 0
-                    overall_avg = sum(avg_scores.values()) / len(avg_scores) if avg_scores else 0
-                    # Display metrics
-                    st.markdown("### 🎯 Overall GEO Scores")
-                    # Main score
-                    col1, col2, col3 = st.columns([1, 2, 1])
-                    with col2:
-                        st.metric("Overall GEO Score", f"{overall_avg:.1f}/10",
-                                 delta=f"{overall_avg - 7.0:.1f}" if overall_avg >= 7.0 else f"{overall_avg - 7.0:.1f}")
-                    # Individual scores
-                    st.markdown("### 📈 Detailed Metrics")
-                    col1, col2, col3, col4 = st.columns(4)
-                    metrics_display = [
-                        ("AI Search Visibility", "ai_search_visibility"),
-                        ("Query Intent Match", "query_intent_matching"),
-                        ("Factual Accuracy", "factual_accuracy"),
-                        ("Conversational Ready", "conversational_readiness")
-                    ]
-                    for i, (display_name, key) in enumerate(metrics_display):
-                        with [col1, col2, col3, col4][i]:
-                            score = avg_scores.get(key, 0)
-                            st.metric(display_name, f"{score:.1f}")
-                    col1, col2, col3, col4 = st.columns(4)
-                    metrics_display_2 = [
-                        ("Semantic Richness", "semantic_richness"),
-                        ("Context Complete", "context_completeness"),
-                        ("Citation Worthy", "citation_worthiness"),
-                        ("Multi-Query Cover", "multi_query_coverage")
-                    ]
-                    for i, (display_name, key) in enumerate(metrics_display_2):
-                        with [col1, col2, col3, col4][i]:
-                            score = avg_scores.get(key, 0)
-                            st.metric(display_name, f"{score:.1f}")
-                    # Recommendations
-                    st.markdown("### 💡 Optimization Recommendations")
-                    all_recommendations = []
-                    all_opportunities = []
-                    for analysis in all_analyses:
-                        all_recommendations.extend(analysis.get('recommendations', []))
-                        all_opportunities.extend(analysis.get('optimization_opportunities', []))
-                    # Remove duplicates
-                    unique_recommendations = list(set(all_recommendations))
-                    for i, rec in enumerate(unique_recommendations[:5], 1):
-                        st.write(f"**{i}.** {rec}")
-                    # Opportunities by priority
-                    if all_opportunities:
-                        st.markdown("### 🚀 Priority Optimizations")
-                        high_priority = [opp for opp in all_opportunities if opp.get('priority') == 'high']
-                        medium_priority = [opp for opp in all_opportunities if opp.get('priority') == 'medium']
-                        if high_priority:
-                            st.markdown("#### 🔴 High Priority")
-                            for opp in high_priority[:3]:
-                                st.write(f"**{opp.get('type', 'Optimization')}**: {opp.get('description', 'No description')}")
-                        if medium_priority:
-                            st.markdown("#### 🟡 Medium Priority")
-                            for opp in medium_priority[:3]:
-                                st.write(f"**{opp.get('type', 'Optimization')}**: {opp.get('description', 'No description')}")
-                    # Detailed page analysis
-                    with st.expander("📋 Detailed Page Analysis"):
-                        for i, analysis in enumerate(all_analyses):
-                            page_data = analysis.get('page_data', {})
-                            st.markdown(f"#### Page {i+1}: {page_data.get('title', 'Unknown Title')}")
-                            st.write(f"**URL**: {page_data.get('url', 'Unknown')}")
-                            st.write(f"**Word Count**: {page_data.get('word_count', 0)}")
-                            if 'primary_topics' in analysis:
-                                st.write(f"**Topics**: {', '.join(analysis['primary_topics'])}")
-                            if 'entities' in analysis:
-                                st.write(f"**Entities**: {', '.join(analysis['entities'])}")
-                            st.write("---")
-                    # Export functionality
-                    st.markdown("### 📥 Export Results")
-                    if st.button("📊 Generate Report"):
-                        report_data = {
-                            'website_url': website_url,
-                            'analysis_date': time.strftime('%Y-%m-%d %H:%M:%S'),
-                            'overall_score': overall_avg,
-                            'individual_scores': avg_scores,
-                            'recommendations': unique_recommendations,
-                            'pages_analyzed': len(all_analyses)
-                        }
-                        st.json(report_data)
-                        st.success("Report generated! You can copy the JSON above for your records.")
-                else:
                     st.error("Could not analyze any pages from the website.")
             except Exception as e:
                 st.error(f"An error occurred during website analysis: {str(e)}")
-# --- Sidebar Information ---
-with st.sidebar:
-    st.markdown("---")
-    st.markdown("### 🔧 Configuration")
-    st.markdown("Set your API keys:")
-    st.code("export GROQ_API_KEY='your-key'")
-    st.markdown("---")
-    st.markdown("### 📖 GEO Metrics Explained")
-    st.markdown("**AI Search Visibility**: Likelihood of appearing in AI search results")
-    st.markdown("**Query Intent Matching**: How well content matches user queries")
-    st.markdown("**Conversational Readiness**: Suitability for AI chat responses")
-    st.markdown("**Citation Worthiness**: Probability of being cited by AI")
-    st.markdown("---")
-    st.markdown("### ℹ️ About")
-    st.markdown("This tool analyzes websites for:")
-    st.markdown("- 🤖 AI search optimization")
-    st.markdown("- 💬 LLM compatibility")
-    st.markdown("- 📊 GEO scoring")
-    st.markdown("- 🎯 Content recommendations")
-st.markdown("---")
-st.markdown("*🚀 AI Content Optimizer - Built with Streamlit, LangChain, and Groq*")

+"""
+Main Streamlit Application - GEO SEO AI Optimizer
+Entry point for the application with UI components
+"""
+import streamlit as st
 import os
 import tempfile
 import json
+from typing import Dict, Any, List
+# Import our custom modules
+from utils.parser import PDFParser, TextParser, WebpageParser
+from utils.scorer import GEOScorer
+from utils.optimizer import ContentOptimizer
+from utils.chunker import VectorChunker
+from utils.export import ResultExporter
+# Import LangChain components
+from langchain_groq import ChatGroq
+from langchain_community.embeddings import HuggingFaceEmbeddings
+class GEOSEOApp:
+    """Main application class that orchestrates all components"""
+    def __init__(self):
+        self.setup_config()
+        self.setup_models()
+        self.setup_parsers()
+        self.setup_components()
+    def setup_config(self):
+        """Initialize configuration and API keys"""
+        self.groq_api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
+        self.hf_api_key = os.getenv("HUGGINGFACE_API_KEY", "your-huggingface-api-key")
+        # Create data directory if it doesn't exist
+        os.makedirs("data/uploaded_files", exist_ok=True)
+    def setup_models(self):
+        """Initialize LLM and embedding models"""
+        self.llm = ChatGroq(
+            api_key=self.groq_api_key,
+            model_name="llama3-8b-8192",
+            temperature=0.1
+        )
+        self.embeddings = HuggingFaceEmbeddings(
+            model_name="sentence-transformers/all-MiniLM-L6-v2",
+            cache_folder="./hf_cache",
+        )
+    def setup_parsers(self):
+        """Initialize content parsers"""
+        self.pdf_parser = PDFParser()
+        self.text_parser = TextParser()
+        self.webpage_parser = WebpageParser()
+    def setup_components(self):
+        """Initialize processing components"""
+        self.geo_scorer = GEOScorer(self.llm)
+        self.content_optimizer = ContentOptimizer(self.llm)
+        self.vector_chunker = VectorChunker(self.embeddings)
+        self.result_exporter = ResultExporter()
+    def run(self):
+        """Main application runner"""
+        st.set_page_config(
+            page_title="GEO SEO AI Optimizer",
+            page_icon="🚀",
+            layout="wide"
+        )
+        st.title("🚀 GEO SEO AI Optimizer")
+        st.markdown("*Optimize your content for AI search engines and LLM systems*")
+        # Sidebar
+        self.render_sidebar()
+        # Main tabs
+        tab1, tab2, tab3 = st.tabs([
+            "📄 Document Q&A",
+            "🔧 Content Enhancement",
+            "🌐 Website GEO Analysis"
         ])
+        with tab1:
+            self.render_document_qa_tab()
+        with tab2:
+            self.render_content_enhancement_tab()
+        with tab3:
+            self.render_website_analysis_tab()
+    def render_sidebar(self):
+        """Render sidebar with information and controls"""
+        st.sidebar.title("🛠️ GEO Tools")
+        st.sidebar.markdown("- 📄 Document Q&A with RAG")
+        st.sidebar.markdown("- 🔧 Content Enhancement")
+        st.sidebar.markdown("- 🌐 Website GEO Analysis")
+        st.sidebar.markdown("- 📊 AI-First SEO Scoring")
+        st.sidebar.markdown("---")
+        st.sidebar.markdown("### 🔧 Configuration")
+        st.sidebar.markdown("Set your API keys:")
+        st.sidebar.code("export GROQ_API_KEY='your-key'")
+        st.sidebar.markdown("---")
+        st.sidebar.markdown("### 📖 GEO Metrics")
+        st.sidebar.markdown("**AI Search Visibility**: How likely AI engines will surface your content")
+        st.sidebar.markdown("**Query Intent Matching**: How well content matches user queries")
+        st.sidebar.markdown("**Conversational Readiness**: Suitability for AI chat responses")
+        st.sidebar.markdown("**Citation Worthiness**: Probability of being cited by AI")
+        st.sidebar.markdown("---")
+        st.sidebar.markdown("### ℹ️ Components")
+        st.sidebar.markdown("- **Parser**: Extract content from various sources")
+        st.sidebar.markdown("- **Scorer**: Analyze GEO performance")
+        st.sidebar.markdown("- **Optimizer**: Enhance content for AI")
+        st.sidebar.markdown("- **Chunker**: Create vector embeddings")
+        st.sidebar.markdown("- **Exporter**: Generate reports")
+    def render_document_qa_tab(self):
+        """Render Document Q&A tab"""
+        st.header("📄 Document Question Answering")
+        st.markdown("Upload documents or paste text to ask questions using RAG.")
+        # File upload
+        uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
+        # Text input
+        pasted_text = st.text_area("Or paste text directly:", height=150)
+        # Question input
+        user_query = st.text_input("Ask a question about the content:")
+        # Submit button
+        if st.button("🔍 Ask Question", key="qa_submit"):
+            if not user_query.strip():
+                st.warning("Please enter a question.")
+                return
             try:
+                # Parse content
+                documents = []
+                if uploaded_file:
+                    with st.spinner("Processing PDF..."):
+                        # Save uploaded file temporarily
+                        temp_path = self.save_uploaded_file(uploaded_file)
+                        documents = self.pdf_parser.parse(temp_path)
+                        os.unlink(temp_path)  # Clean up
+                elif pasted_text.strip():
+                    with st.spinner("Processing text..."):
+                        documents = self.text_parser.parse(pasted_text)
+                else:
+                    st.warning("Please upload a PDF or paste some text.")
+                    return
+                # Create vector store and answer question
+                with st.spinner("Creating embeddings and searching..."):
+                    qa_chain = self.vector_chunker.create_qa_chain(documents, self.llm)
+                    result = qa_chain({"query": user_query})
+                # Display results
                 st.markdown("### 💬 Answer")
                 st.write(result["result"])
+                # Show sources
                 with st.expander("📄 Source Documents"):
+                    for i, doc in enumerate(result.get("source_documents", [])):
                         st.write(f"**Source {i+1}:**")
+                        content = doc.page_content
+                        st.write(content[:500] + "..." if len(content) > 500 else content)
                         if hasattr(doc, 'metadata') and doc.metadata:
                             st.write(f"*Metadata: {doc.metadata}*")
                         st.write("---")
             except Exception as e:
                 st.error(f"An error occurred: {str(e)}")
+    def render_content_enhancement_tab(self):
+        """Render Content Enhancement tab"""
+        st.header("🔧 Content Enhancement")
+        st.markdown("Analyze and optimize your content for better AI/LLM performance.")
+        # Content input
+        input_text = st.text_area(
+            "Enter content to analyze and enhance:",
+            height=200,
+            key="enhancement_input"
+        )
+        # Analysis options
+        col1, col2 = st.columns(2)
+        with col1:
+            analyze_only = st.checkbox("Analysis only (no rewriting)", value=False)
+        with col2:
+            include_keywords = st.checkbox("Include keyword suggestions", value=True)
+        # Submit button
+        if st.button("🔧 Analyze & Enhance", key="enhancement_submit"):
+            if not input_text.strip():
+                st.warning("Please enter some content to analyze.")
+                return
             try:
+                with st.spinner("Analyzing content..."):
+                    # Run content analysis and optimization
+                    result = self.content_optimizer.optimize_content(
+                        input_text,
+                        analyze_only=analyze_only,
+                        include_keywords=include_keywords
+                    )
+                if result.get("error"):
+                    st.error(f"Analysis failed: {result['error']}")
+                    return
+                # Display results
                 st.markdown("### 📊 Analysis Results")
+                # Show scores
+                scores = result.get("scores", {})
+                if scores:
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        clarity = scores.get("clarity", 0)
+                        st.metric("Clarity", f"{clarity}/10")
+                    with col2:
+                        structure = scores.get("structuredness", 0)
+                        st.metric("Structure", f"{structure}/10")
+                    with col3:
+                        answerability = scores.get("answerability", 0)
+                        st.metric("Answerability", f"{answerability}/10")
+                # Show keywords
+                keywords = result.get("keywords", [])
+                if keywords:
+                    st.markdown("#### 🔑 Key Terms")
+                    st.write(", ".join(keywords))
+                # Show optimized content
+                optimized_text = result.get("optimized_text", "")
+                if optimized_text and not analyze_only:
+                    st.markdown("#### ✨ Optimized Content")
+                    st.text_area(
+                        "Enhanced version:",
+                        value=optimized_text,
+                        height=200,
+                        key="optimized_output"
+                    )
+                # Export option
+                if st.button("📥 Export Results"):
+                    export_data = self.result_exporter.export_enhancement_results(result)
+                    st.download_button(
+                        label="Download Analysis Report",
+                        data=json.dumps(export_data, indent=2),
+                        file_name=f"content_analysis_{int(time.time())}.json",
+                        mime="application/json"
+                    )
             except Exception as e:
+                st.error(f"An error occurred: {str(e)}")
+    def render_website_analysis_tab(self):
+        """Render Website GEO Analysis tab"""
+        st.header("🌐 Website GEO Analysis")
+        st.markdown("Analyze websites for Generative Engine Optimization (GEO) performance.")
+        # URL input
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            website_url = st.text_input(
+                "Enter website URL:",
+                placeholder="https://example.com"
+            )
+        with col2:
+            max_pages = st.selectbox("Pages to analyze:", [1, 3, 5], index=0)
+        # Analysis options
+        col1, col2 = st.columns(2)
+        with col1:
+            include_subpages = st.checkbox("Include subpages", value=False)
+        with col2:
+            detailed_analysis = st.checkbox("Detailed analysis", value=True)
+        # Submit button
+        if st.button("🌐 Analyze Website", key="website_analyze"):
+            if not website_url.strip():
+                st.warning("Please enter a website URL.")
+                return
             try:
+                # Normalize URL
+                if not website_url.startswith(('http://', 'https://')):
+                    website_url = 'https://' + website_url
+                with st.spinner(f"Analyzing website: {website_url}"):
+                    # Parse website content
+                    pages_data = self.webpage_parser.parse_website(
+                        website_url,
+                        max_pages=max_pages,
+                        include_subpages=include_subpages
+                    )
+                    if not pages_data:
+                        st.error("Could not extract content from the website.")
+                        return
+                    st.success(f"Successfully extracted content from {len(pages_data)} page(s)")
+                # Analyze GEO scores
+                with st.spinner("Calculating GEO scores..."):
+                    geo_results = []
+                    for i, page_data in enumerate(pages_data):
+                        with st.spinner(f"Analyzing page {i+1}/{len(pages_data)}..."):
+                            analysis = self.geo_scorer.analyze_page_geo(
+                                page_data['content'],
+                                page_data['title'],
+                                detailed=detailed_analysis
+                            )
+                            if not analysis.get('error'):
+                                analysis['page_data'] = page_data
+                                geo_results.append(analysis)
+                            else:
+                                st.warning(f"Could not analyze page {i+1}: {analysis['error']}")
+                if not geo_results:
                     st.error("Could not analyze any pages from the website.")
+                    return
+                # Display results
+                self.display_geo_results(geo_results, website_url)
+                # Export functionality
+                st.markdown("### 📥 Export Results")
+                if st.button("📊 Generate Full Report"):
+                    report_data = self.result_exporter.export_geo_results(
+                        geo_results,
+                        website_url
+                    )
+                    st.download_button(
+                        label="Download GEO Report",
+                        data=json.dumps(report_data, indent=2),
+                        file_name=f"geo_analysis_{website_url.replace('https://', '').replace('/', '_')}.json",
+                        mime="application/json"
+                    )
             except Exception as e:
                 st.error(f"An error occurred during website analysis: {str(e)}")
+    def display_geo_results(self, geo_results: List[Dict], website_url: str):
+        """Display GEO analysis results"""
+        st.markdown("## 📊 GEO Analysis Results")
+        # Calculate average scores
+        avg_scores = self.calculate_average_scores(geo_results)
+        overall_avg = sum(avg_scores.values()) / len(avg_scores) if avg_scores else 0
+        # Main score display
+        col1, col2, col3 = st.columns([1, 2, 1])
+        with col2:
+            st.metric(
+                "Overall GEO Score",
+                f"{overall_avg:.1f}/10",
+                delta=f"{overall_avg - 7.0:.1f}" if overall_avg != 7.0 else None
+            )
+        # Individual metrics
+        st.markdown("### 📈 Detailed GEO Metrics")
+        # First row of metrics
+        col1, col2, col3, col4 = st.columns(4)
+        metrics_row1 = [
+            ("AI Search Visibility", "ai_search_visibility"),
+            ("Query Intent Match", "query_intent_matching"),
+            ("Factual Accuracy", "factual_accuracy"),
+            ("Conversational Ready", "conversational_readiness")
+        ]
+        for i, (display_name, key) in enumerate(metrics_row1):
+            with [col1, col2, col3, col4][i]:
+                score = avg_scores.get(key, 0)
+                st.metric(display_name, f"{score:.1f}")
+        # Second row of metrics
+        col1, col2, col3, col4 = st.columns(4)
+        metrics_row2 = [
+            ("Semantic Richness", "semantic_richness"),
+            ("Context Complete", "context_completeness"),
+            ("Citation Worthy", "citation_worthiness"),
+            ("Multi-Query Cover", "multi_query_coverage")
+        ]
+        for i, (display_name, key) in enumerate(metrics_row2):
+            with [col1, col2, col3, col4][i]:
+                score = avg_scores.get(key, 0)
+                st.metric(display_name, f"{score:.1f}")
+        # Recommendations
+        self.display_recommendations(geo_results)
+        # Detailed page analysis
+        with st.expander("📋 Detailed Page Analysis"):
+            for i, analysis in enumerate(geo_results):
+                page_data = analysis.get('page_data', {})
+                st.markdown(f"#### Page {i+1}: {page_data.get('title', 'Unknown Title')}")
+                st.write(f"**URL**: {page_data.get('url', 'Unknown')}")
+                st.write(f"**Word Count**: {page_data.get('word_count', 0)}")
+                # Show topics and entities if available
+                if 'primary_topics' in analysis:
+                    st.write(f"**Topics**: {', '.join(analysis['primary_topics'])}")
+                if 'entities' in analysis:
+                    st.write(f"**Entities**: {', '.join(analysis['entities'])}")
+                # Show page-specific scores
+                if 'geo_scores' in analysis:
+                    scores = analysis['geo_scores']
+                    score_text = ", ".join([f"{k}: {v:.1f}" for k, v in scores.items()])
+                    st.write(f"**Scores**: {score_text}")
+                st.write("---")
+    def display_recommendations(self, geo_results: List[Dict]):
+        """Display optimization recommendations"""
+        st.markdown("### 💡 Optimization Recommendations")
+        # Collect all recommendations
+        all_recommendations = []
+        all_opportunities = []
+        for analysis in geo_results:
+            all_recommendations.extend(analysis.get('recommendations', []))
+            all_opportunities.extend(analysis.get('optimization_opportunities', []))
+        # Remove duplicates and display
+        unique_recommendations = list(set(all_recommendations))
+        if unique_recommendations:
+            for i, rec in enumerate(unique_recommendations[:5], 1):
+                st.write(f"**{i}.** {rec}")
+        # Priority opportunities
+        if all_opportunities:
+            st.markdown("#### 🚀 Priority Optimizations")
+            high_priority = [opp for opp in all_opportunities if opp.get('priority') == 'high']
+            medium_priority = [opp for opp in all_opportunities if opp.get('priority') == 'medium']
+            if high_priority:
+                st.markdown("##### 🔴 High Priority")
+                for opp in high_priority[:3]:
+                    st.write(f"**{opp.get('type', 'Optimization')}**: {opp.get('description', 'No description')}")
+            if medium_priority:
+                st.markdown("##### 🟡 Medium Priority")
+                for opp in medium_priority[:3]:
+                    st.write(f"**{opp.get('type', 'Optimization')}**: {opp.get('description', 'No description')}")
+    def calculate_average_scores(self, geo_results: List[Dict]) -> Dict[str, float]:
+        """Calculate average GEO scores across all pages"""
+        if not geo_results:
+            return {}
+        # Get all score keys from the first result
+        score_keys = list(geo_results[0].get('geo_scores', {}).keys())
+        avg_scores = {}
+        for key in score_keys:
+            scores = [
+                result['geo_scores'][key]
+                for result in geo_results
+                if 'geo_scores' in result and key in result['geo_scores']
+            ]
+            avg_scores[key] = sum(scores) / len(scores) if scores else 0
+        return avg_scores
+    def save_uploaded_file(self, uploaded_file) -> str:
+        """Save uploaded file to temporary location"""
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+            tmp_file.write(uploaded_file.read())
+            return tmp_file.name
+def main():
+    """Main entry point"""
+    app = GEOSEOApp()
+    app.run()
+if __name__ == "__main__":
+    main()