# Professional AI Content Analyzer - Gradio App for Hugging Face
# Generates comprehensive content analysis reports with professional formatting

import gradio as gr
import requests
import json
import nltk
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from datetime import datetime
import os

# Download required NLTK data
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt', quiet=True)

# ============================================================================
# PROFESSIONAL CONTENT ANALYZER CLASS
# ============================================================================

class ProfessionalContentAnalyzer:
    def __init__(self):
        self.model = None
        self._load_model()
        
    def _load_model(self):
        """Load the embedding model with error handling"""
        try:
            self.model = SentenceTransformer('all-MiniLM-L6-v2')
        except Exception as e:
            print(f"Warning: Could not load embedding model: {e}")
            self.model = None
    
    def split_content(self, content, chunk_size=500):
        """Split content into chunks"""
        try:
            sentences = nltk.sent_tokenize(content)
        except:
            # Fallback if NLTK fails
            sentences = content.split('. ')
            
        chunks = []
        current_chunk = ""
        
        for sentence in sentences:
            if len(current_chunk + sentence) < chunk_size:
                current_chunk += " " + sentence
            else:
                if current_chunk:
                    chunks.append(current_chunk.strip())
                current_chunk = sentence
                
        if current_chunk:
            chunks.append(current_chunk.strip())
            
        return chunks
    
    def generate_fan_out_queries(self, main_query, persona, api_key):
        """Generate fan-out queries for comprehensive persona coverage"""
        
        fan_out_prompt = f"""You are an expert Query Fan-Out generator. Take the main query and persona, then expand into diverse related queries that reflect how this persona would search and what they need to know.

Main Query: {main_query}
Persona: {persona}

Consider how the persona influences:
• Search Language: Technical vs casual vocabulary, industry jargon
• Intent Depth: Surface-level vs deep expertise needs
• Context Requirements: What background knowledge they have/lack
• Decision Factors: What matters most to this persona type
• Use Cases: How they'll apply the information
• Pain Points: Common challenges this persona faces

Generate 8-12 diverse fan-out queries that explore different angles this persona would need.

Respond with a JSON object:
{{
    "fan_out_queries": ["query 1", "query 2", "query 3", ...],
    "query_categories": ["category 1", "category 2", ...],
    "persona_reasoning": "explanation of how persona influenced the queries"
}}

Respond only with valid JSON."""

        if api_key and api_key.startswith('sk-ant-api03-'):
            try:
                return self._call_api(fan_out_prompt, api_key)
            except Exception as e:
                return self._demo_fan_out(main_query, persona)
        else:
            return self._demo_fan_out(main_query, persona)
    
    def analyze_content_comprehensive(self, content, main_query, persona, api_key):
        """Enhanced analysis using query fanning for comprehensive coverage"""
        
        # Step 1: Generate fan-out queries
        fan_out_data = self.generate_fan_out_queries(main_query, persona, api_key)
        
        if isinstance(fan_out_data, str):
            try:
                fan_out_data = json.loads(fan_out_data.strip())
            except:
                fan_out_data = self._demo_fan_out(main_query, persona)
        
        fan_out_queries = fan_out_data.get('fan_out_queries', [])
        
        # Step 2: Split content
        chunks = self.split_content(content)
        
        # Step 3: Comprehensive analysis with fan-out context
        all_queries = [main_query] + fan_out_queries
        queries_text = "\n".join([f"• {q}" for q in all_queries])
        
        comprehensive_prompt = f"""Analyze this content through the perspective of: {persona}

Content to analyze:
{content}

Main Query: {main_query}

Fan-Out Queries (comprehensive coverage needed):
{queries_text}

IMPORTANT: Also provide section-by-section topical relevance scores for the content.

Provide a comprehensive professional analysis suitable for a detailed report. Focus on actionable insights and strategic recommendations.

Respond with a JSON object containing:
{{
    "coverage_score": 1-10,
    "intent_fulfillment": 1-10,
    "fan_out_coverage": 1-10,
    "executive_summary": "2-3 sentence overview of the analysis focusing on key findings and overall assessment",
    "query_gaps": ["Detailed descriptions of gaps with explanation of impact"],
    "strongest_sections": ["Detailed explanations of what works well and why it's effective"],
    "weakest_sections": ["Detailed explanations of what's lacking and why it's problematic"],
    "improvements": ["Specific, actionable recommendations with implementation guidance"],
    "missing_elements": ["Detailed descriptions of what's missing with explanations of why it's important"],
    "persona_specific_gaps": ["Gaps specific to this persona's needs with detailed explanations"],
    "strategic_recommendations": ["High-level strategic advice for content improvement"],
    "implementation_priority": ["Ordered list of which improvements to tackle first"],
    "business_impact": "Assessment of how the identified issues affect business outcomes",
    "section_scores": [
        {{
            "snippet": "First 100 characters of the section",
            "score": 1-10,
            "relevance_reason": "Why this section scored this way in relation to the queries"
        }}
    ]
}}

Make all text entries detailed and professional, suitable for executive reporting. Focus on strategic insights and business impact."""

        if api_key and api_key.startswith('sk-ant-api03-'):
            try:
                analysis_result = self._call_api(comprehensive_prompt, api_key)
                if isinstance(analysis_result, str):
                    analysis = json.loads(analysis_result.strip())
                else:
                    analysis = analysis_result
                
                # Add fan-out data to results
                analysis['fan_out_queries'] = fan_out_queries
                analysis['persona_reasoning'] = fan_out_data.get('persona_reasoning', '')
                analysis['query_categories'] = fan_out_data.get('query_categories', [])
                
                return analysis
            except Exception as e:
                return self._demo_comprehensive_analysis(main_query, persona, fan_out_queries)
        else:
            return self._demo_comprehensive_analysis(main_query, persona, fan_out_queries)
    
    def generate_professional_report(self, results, content_title, main_query, persona):
        """Generate a professional report from analysis results"""
        
        report = f"""# Content Analysis Report: {content_title}

## Executive Summary

{results.get('executive_summary', 'Comprehensive analysis of content effectiveness and strategic recommendations for improvement.')}

{results.get('business_impact', 'This analysis identifies key opportunities for content enhancement and strategic positioning.')}

## Performance Metrics

**Coverage Score:** {results.get('coverage_score', 'N/A')}/10 - {"Excellent" if results.get('coverage_score', 0) >= 8 else "Good" if results.get('coverage_score', 0) >= 6 else "Needs Improvement"} content coverage  
**Intent Fulfillment:** {results.get('intent_fulfillment', 'N/A')}/10 - {"Fully addresses" if results.get('intent_fulfillment', 0) >= 9 else "Mostly addresses" if results.get('intent_fulfillment', 0) >= 7 else "Partially addresses"} "{main_query}"  
**Fan-Out Coverage:** {results.get('fan_out_coverage', 'N/A')}/10 - {"Comprehensive" if results.get('fan_out_coverage', 0) >= 8 else "Adequate" if results.get('fan_out_coverage', 0) >= 6 else "Limited"} coverage of related topics

## Query Fan-Out Analysis

**Main Query:** {main_query}

**Generated Fan-Out Queries ({len(results.get('fan_out_queries', []))}):**
"""
        
        # Add fan-out queries with categories if available
        fan_out_queries = results.get('fan_out_queries', [])
        query_categories = results.get('query_categories', [])
        
        if fan_out_queries:
            for i, query in enumerate(fan_out_queries, 1):
                category = f" *({query_categories[i-1]})*" if i-1 < len(query_categories) else ""
                report += f"{i}. {query}{category}\n"
        else:
            report += "No fan-out queries generated.\n"
            
        report += f"\n**Persona Reasoning:** {results.get('persona_reasoning', 'Analysis tailored to the specific needs and decision-making process of the target persona.')}\n"

        # Add Content Section Scores if available
        if results.get('section_scores'):
            report += f"""
## Content Section Analysis

**Topical Relevance Scores by Section:**

"""
            section_scores = results.get('section_scores', [])
            for i, section_score in enumerate(section_scores, 1):
                score = section_score.get('score', 'N/A')
                snippet = section_score.get('snippet', '')[:100] + '...' if len(section_score.get('snippet', '')) > 100 else section_score.get('snippet', '')
                relevance = section_score.get('relevance_reason', 'No analysis available')
                
                score_label = "🟢 High" if score >= 8 else "🟡 Medium" if score >= 6 else "🔴 Low" if score >= 1 else "N/A"
                
                report += f"""**Section {i}:** {score}/10 {score_label}
*Content:* "{snippet}"
*Analysis:* {relevance}

"""
        
        report += f"""## Content Strengths

The content demonstrates several key strengths that make it valuable for {persona.lower()}s:

"""
        
        # Add strongest sections
        for i, strength in enumerate(results.get('strongest_sections', []), 1):
            report += f"**{self._extract_title(strength)}:** {self._extract_description(strength)}\n\n"
        
        # Corrected syntax for the next multiline string
        report += """## Critical Weaknesses

Several significant weaknesses limit the content's practical utility:

"""
        
        # Add weakest sections
        for i, weakness in enumerate(results.get('weakest_sections', []), 1):
            report += f"**{self._extract_title(weakness)}:** {self._extract_description(weakness)}\n\n"
        
        report += f"""## Essential Missing Elements

The analysis identified {len(results.get('missing_elements', []))} critical gaps that significantly limit the content's practical value for {persona.lower()}s:

"""
        
        # Add missing elements with detailed explanations
        for i, element in enumerate(results.get('missing_elements', []), 1):
            title = self._extract_title(element)
            description = self._extract_description(element)
            report += f"### {i}. {title}\n{description}\n\n"
        
        report += """## Strategic Recommendations

To transform this content into a comprehensive resource, the following enhancements are recommended:

"""
        
        # Add improvements
        for i, improvement in enumerate(results.get('improvements', []), 1):
            report += f"**{self._extract_title(improvement)}:** {self._extract_description(improvement)}\n\n"
        
        # Add implementation priority if available
        if results.get('implementation_priority'):
            report += """## Implementation Priority

Based on impact and feasibility, address improvements in this order:

"""
            for i, priority in enumerate(results.get('implementation_priority', []), 1):
                report += f"{i}. {priority}\n"
            report += "\n"
        
        report += f"""## Conclusion

{"This content provides a solid foundation but requires strategic enhancement to serve as a complete resource for " + persona.lower() + "s." if results.get('coverage_score', 0) >= 7 else "This content needs significant improvement to effectively serve " + persona.lower() + "s."} The identified gaps represent substantial opportunities for improved business outcomes and user engagement. Addressing these areas systematically will transform the content from {"an educational resource into a comprehensive strategic tool" if results.get('coverage_score', 0) >= 7 else "a basic overview into a valuable professional resource"}.

---
*Report generated on {datetime.now().strftime('%B %d, %Y')} | Analysis Target: {persona} | Query: "{main_query}"*"""
        
        return report
    
    def _extract_title(self, text):
        """Extract title from text (everything before first dash or colon)"""
        if ' - ' in text:
            return text.split(' - ')[0].strip()
        elif ': ' in text:
            return text.split(': ')[0].strip()
        else:
            words = text.split()
            return ' '.join(words[:4]) + ('...' if len(words) > 4 else '')
    
    def _extract_description(self, text):
        """Extract description from text (everything after first dash or colon)"""
        if ' - ' in text:
            return text.split(' - ', 1)[1].strip()
        elif ': ' in text:
            return text.split(': ', 1)[1].strip()
        else:
            return text.strip()
    
    def _call_api(self, prompt, api_key):
        """Call Anthropic API"""
        
        headers = {
            "Content-Type": "application/json",
            "x-api-key": api_key,
            "anthropic-version": "2023-06-01"
        }
        
        data = {
            "model": "claude-sonnet-4-20250514",
            "max_tokens": 3000,
            "messages": [{"role": "user", "content": prompt}]
        }
        
        response = requests.post("https://api.anthropic.com/v1/messages", headers=headers, json=data)
        
        if response.status_code != 200:
            raise Exception(f"API returned {response.status_code}: {response.text}")
        
        content = response.json()['content'][0]['text']
        
        # Clean JSON response
        if '```json' in content:
            content = content.split('```json')[1].split('```')[0]
        
        return content.strip()
    
    def _demo_fan_out(self, main_query, persona):
        """Demo fan-out queries when API is not available"""
        return {
            "fan_out_queries": [
                f"How does {main_query.lower()} specifically benefit {persona.lower()}?",
                f"What are the costs and ROI of {main_query.lower()} for {persona.lower()}?",
                f"What challenges might {persona.lower()} face with {main_query.lower()}?",
                f"How to implement {main_query.lower()} step by step for {persona.lower()}?",
                f"What alternatives to {main_query.lower()} should {persona.lower()} consider?",
                f"How long does {main_query.lower()} typically take for {persona.lower()}?",
                f"What tools or resources does {persona.lower()} need for {main_query.lower()}?",
                f"What mistakes should {persona.lower()} avoid with {main_query.lower()}?"
            ],
            "query_categories": ["Benefits", "Costs", "Challenges", "Implementation", "Alternatives", "Timeline", "Resources", "Pitfalls"],
            "persona_reasoning": f"Generated queries covering the full spectrum of what {persona} would need to know about {main_query}, from initial research to implementation."
        }
    
    def _demo_comprehensive_analysis(self, main_query, persona, fan_out_queries):
        """Enhanced demo analysis for when API is not available"""
        return {
            "coverage_score": 7,
            "intent_fulfillment": 8,
            "fan_out_coverage": 6,
            "executive_summary": f"The analyzed content provides a solid foundation for understanding {main_query.lower()} but lacks several critical elements that {persona.lower()}s need for successful implementation. While conceptual coverage is strong, practical execution guidance and business integration strategies require significant enhancement.",
            "query_gaps": [
                "Implementation timeline and resource requirements not adequately covered - This gap prevents proper project planning and budget allocation",
                "Measurement and analytics guidance is insufficient - Without clear metrics, success cannot be properly evaluated or demonstrated to stakeholders"
            ],
            "strongest_sections": [
                "Clear foundational explanation - Establishes context well and builds understanding progressively, making complex concepts accessible to the target persona",
                "Strong business case development - Effectively connects concepts to tangible business outcomes and strategic value, helping justify implementation decisions"
            ],
            "weakest_sections": [
                "Limited practical implementation guidance - Lacks specific steps, timelines, and resource requirements needed for actual execution",
                "Missing measurement framework - No specific KPIs, benchmarks, or success metrics provided for tracking performance and demonstrating ROI"
            ],
            "improvements": [
                "Add comprehensive ROI metrics and case studies - Include specific performance data, industry benchmarks, and real-world examples showing quantifiable results",
                "Develop competitive analysis framework - Provide systematic approaches for identifying opportunities and gaps compared to competitors",
                "Include detailed implementation planning - Add timeline expectations, resource requirements, and project management guidance for practical execution"
            ],
            "missing_elements": [
                "Content promotion and distribution strategies - The content focuses only on creation but ignores how to effectively promote and distribute across multiple channels",
                "Integration guidance with existing systems - No guidance on how to integrate with current marketing automation, CRM systems, and lead nurturing workflows",
                "Budget considerations and resource allocation - Missing practical business planning elements including cost-benefit analysis and resource requirements by business size"
            ],
            "persona_specific_gaps": [
                "No stakeholder buy-in strategies - Missing guidance on how to present concepts and gain executive support for implementation",
                "Limited competitive positioning insights - Lacks competitive analysis and market positioning strategies specific to the persona's competitive landscape"
            ],
            "strategic_recommendations": [
                "Focus on practical implementation over theoretical concepts",
                "Add measurement and ROI frameworks for business justification",
                "Include system integration guidance for seamless adoption"
            ],
            "implementation_priority": [
                "Add specific metrics and measurement frameworks first",
                "Develop practical implementation guidelines and timelines",
                "Create competitive analysis and positioning strategies",
                "Include system integration and workflow guidance"
            ],
            "business_impact": "The identified gaps significantly limit practical implementation success and may result in poor adoption, unmeasurable results, and difficulty demonstrating value to stakeholders.",
            "section_scores": [
                {
                    "snippet": "AI implementation in business requires careful planning and execution. Companies should start by",
                    "score": 8,
                    "relevance_reason": "Strong opening that directly addresses implementation strategy and provides clear business context for executives"
                },
                {
                    "snippet": "Key considerations include data quality, team training, and change management. Organizations need",
                    "score": 7,
                    "relevance_reason": "Covers important practical elements but lacks specific metrics and timelines that executives need for planning"
                },
                {
                    "snippet": "Successful AI adoption typically follows a phased approach: pilot projects, proof of concept,",
                    "score": 6,
                    "relevance_reason": "Provides useful framework but missing detailed implementation guidance and resource requirements"
                }
            ],
            "fan_out_queries": fan_out_queries,
            "persona_reasoning": f"Analysis tailored to {persona}'s specific decision-making process and information needs",
            "query_categories": ["Implementation", "ROI", "Resources", "Measurement"]
        }

# ============================================================================
# GRADIO INTERFACE
# ============================================================================

# Initialize the analyzer
analyzer = ProfessionalContentAnalyzer()

def analyze_content(content, main_query, persona, content_title, api_key):
    """Main function called by Gradio interface"""
    
    # Validate inputs
    if not content.strip():
        return "❌ Please provide content to analyze."
    
    if not main_query.strip():
        return "❌ Please provide a main query."
    
    if not persona.strip():
        return "❌ Please specify a persona."
    
    if not content_title.strip():
        content_title = f"{main_query} Analysis"
    
    try:
        # Perform analysis
        results = analyzer.analyze_content_comprehensive(content, main_query, persona, api_key)
        
        # Generate report
        report = analyzer.generate_professional_report(results, content_title, main_query, persona)
        
        return report
        
    except Exception as e:
        return f"❌ An error occurred during analysis: {str(e)}"

# Create Gradio interface
with gr.Blocks(
    title="Professional AI Content Analyzer",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        max-width: 1200px !important;
    }
    .output-markdown {
        font-family: 'Georgia', serif;
        line-height: 1.6;
    }
    """
) as demo:
    
    gr.HTML("""
    <div style="text-align: center; margin-bottom: 20px;">
        <h1>🚀 Professional AI Content Analyzer</h1>
        <p style="font-size: 18px; color: #666;">Generate comprehensive content analysis reports with strategic recommendations</p>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.HTML("<h3>📝 Input</h3>")
            
            content_input = gr.Textbox(
                label="Content to Analyze",
                placeholder="Paste your content here (articles, blog posts, documentation, etc.)",
                lines=10,
                max_lines=15
            )
            
            main_query = gr.Textbox(
                label="Main Query",
                placeholder="e.g., 'What is a Topic Cluster?', 'How to implement AI?'",
                lines=1
            )
            
            persona = gr.Textbox(
                label="Target Persona",
                placeholder="e.g., 'Marketing Professional', 'Business Executive', 'Software Developer'",
                lines=1
            )
            
            content_title = gr.Textbox(
                label="Content Title (Optional)",
                placeholder="e.g., 'Topic Cluster Guide', 'AI Implementation Strategy'",
                lines=1
            )
            
            api_key = gr.Textbox(
                label="Anthropic API Key (Optional - for enhanced analysis)",
                placeholder="sk-ant-api03-... (leave empty for demo mode)",
                type="password",
                lines=1
            )
            
            with gr.Row():
                analyze_btn = gr.Button("🔍 Analyze Content", variant="primary", size="lg")
                clear_btn = gr.Button("🗑️ Clear", variant="secondary")
        
        with gr.Column(scale=2):
            gr.HTML("<h3>📊 Professional Analysis Report</h3>")
            
            output = gr.Markdown(
                label="Analysis Report",
                elem_classes=["output-markdown"]
            )
    
    gr.HTML("""
    <div style="margin-top: 30px; padding: 20px; background: #f8f9fa; border-radius: 10px;">
        <h3>🌟 Features</h3>
        <ul>
            <li><strong>Query Fan-Out:</strong> Generates 8-12 related queries for comprehensive coverage</li>
            <li><strong>Professional Reports:</strong> Executive-style analysis with strategic recommendations</li>
            <li><strong>Persona-Specific Analysis:</strong> Tailored insights based on your target audience</li>
            <li><strong>Gap Analysis:</strong> Identifies missing elements and improvement opportunities</li>
            <li><strong>Implementation Priority:</strong> Orders recommendations by impact and feasibility</li>
        </ul>
        
        <h4>💡 Tips</h4>
        <ul>
            <li>Provide an Anthropic API key for AI-powered analysis, or use demo mode for sample reports</li>
            <li>Be specific with your persona (e.g., "B2B Marketing Manager" vs "Marketer")</li>
            <li>Longer content (500+ words) provides more detailed analysis</li>
        </ul>
    </div>
    """)
    
    # Event handlers
    analyze_btn.click(
        fn=analyze_content,
        inputs=[content_input, main_query, persona, content_title, api_key],
        outputs=output
    )
    
    clear_btn.click(
        fn=lambda: ("", "", "", "", ""),
        outputs=[content_input, main_query, persona, content_title, api_key]
    )
    
    # Example to get users started
    gr.Examples(
        examples=[
            [
                "AI implementation in business requires careful planning and execution. Companies should start by identifying specific use cases where AI can provide measurable value, such as customer service automation or predictive analytics. Key considerations include data quality, team training, and change management. Organizations need clean, relevant data to train AI models effectively. Staff must be prepared for new workflows and tools. Successful AI adoption typically follows a phased approach: pilot projects, proof of concept, scaling, and optimization.",
                "How should businesses implement AI?",
                "Business Executive",
                "AI Implementation Guide for Executives",
                ""
            ]
        ],
        inputs=[content_input, main_query, persona, content_title, api_key],
        label="📋 Try This Example"
    )

# Launch the app
if __name__ == "__main__":
    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860
    )