# Professional AI Content Analyzer - Gradio App for Hugging Face # Generates comprehensive content analysis reports with professional formatting import gradio as gr import requests import json import nltk from sentence_transformers import SentenceTransformer import numpy as np from sklearn.metrics.pairwise import cosine_similarity from sklearn.feature_extraction.text import TfidfVectorizer from datetime import datetime import os # Download required NLTK data try: nltk.data.find('tokenizers/punkt') except LookupError: nltk.download('punkt', quiet=True) # ============================================================================ # PROFESSIONAL CONTENT ANALYZER CLASS # ============================================================================ class ProfessionalContentAnalyzer: def __init__(self): self.model = None self._load_model() def _load_model(self): """Load the embedding model with error handling""" try: self.model = SentenceTransformer('all-MiniLM-L6-v2') except Exception as e: print(f"Warning: Could not load embedding model: {e}") self.model = None def split_content(self, content, chunk_size=500): """Split content into chunks""" try: sentences = nltk.sent_tokenize(content) except: # Fallback if NLTK fails sentences = content.split('. ') chunks = [] current_chunk = "" for sentence in sentences: if len(current_chunk + sentence) < chunk_size: current_chunk += " " + sentence else: if current_chunk: chunks.append(current_chunk.strip()) current_chunk = sentence if current_chunk: chunks.append(current_chunk.strip()) return chunks def generate_fan_out_queries(self, main_query, persona, api_key): """Generate fan-out queries for comprehensive persona coverage""" fan_out_prompt = f"""You are an expert Query Fan-Out generator. Take the main query and persona, then expand into diverse related queries that reflect how this persona would search and what they need to know. Main Query: {main_query} Persona: {persona} Consider how the persona influences: • Search Language: Technical vs casual vocabulary, industry jargon • Intent Depth: Surface-level vs deep expertise needs • Context Requirements: What background knowledge they have/lack • Decision Factors: What matters most to this persona type • Use Cases: How they'll apply the information • Pain Points: Common challenges this persona faces Generate 8-12 diverse fan-out queries that explore different angles this persona would need. Respond with a JSON object: {{ "fan_out_queries": ["query 1", "query 2", "query 3", ...], "query_categories": ["category 1", "category 2", ...], "persona_reasoning": "explanation of how persona influenced the queries" }} Respond only with valid JSON.""" if api_key and api_key.startswith('sk-ant-api03-'): try: return self._call_api(fan_out_prompt, api_key) except Exception as e: return self._demo_fan_out(main_query, persona) else: return self._demo_fan_out(main_query, persona) def analyze_content_comprehensive(self, content, main_query, persona, api_key): """Enhanced analysis using query fanning for comprehensive coverage""" # Step 1: Generate fan-out queries fan_out_data = self.generate_fan_out_queries(main_query, persona, api_key) if isinstance(fan_out_data, str): try: fan_out_data = json.loads(fan_out_data.strip()) except: fan_out_data = self._demo_fan_out(main_query, persona) fan_out_queries = fan_out_data.get('fan_out_queries', []) # Step 2: Split content chunks = self.split_content(content) # Step 3: Comprehensive analysis with fan-out context all_queries = [main_query] + fan_out_queries queries_text = "\n".join([f"• {q}" for q in all_queries]) comprehensive_prompt = f"""Analyze this content through the perspective of: {persona} Content to analyze: {content} Main Query: {main_query} Fan-Out Queries (comprehensive coverage needed): {queries_text} IMPORTANT: Also provide section-by-section topical relevance scores for the content. Provide a comprehensive professional analysis suitable for a detailed report. Focus on actionable insights and strategic recommendations. Respond with a JSON object containing: {{ "coverage_score": 1-10, "intent_fulfillment": 1-10, "fan_out_coverage": 1-10, "executive_summary": "2-3 sentence overview of the analysis focusing on key findings and overall assessment", "query_gaps": ["Detailed descriptions of gaps with explanation of impact"], "strongest_sections": ["Detailed explanations of what works well and why it's effective"], "weakest_sections": ["Detailed explanations of what's lacking and why it's problematic"], "improvements": ["Specific, actionable recommendations with implementation guidance"], "missing_elements": ["Detailed descriptions of what's missing with explanations of why it's important"], "persona_specific_gaps": ["Gaps specific to this persona's needs with detailed explanations"], "strategic_recommendations": ["High-level strategic advice for content improvement"], "implementation_priority": ["Ordered list of which improvements to tackle first"], "business_impact": "Assessment of how the identified issues affect business outcomes", "section_scores": [ {{ "snippet": "First 100 characters of the section", "score": 1-10, "relevance_reason": "Why this section scored this way in relation to the queries" }} ] }} Make all text entries detailed and professional, suitable for executive reporting. Focus on strategic insights and business impact.""" if api_key and api_key.startswith('sk-ant-api03-'): try: analysis_result = self._call_api(comprehensive_prompt, api_key) if isinstance(analysis_result, str): analysis = json.loads(analysis_result.strip()) else: analysis = analysis_result # Add fan-out data to results analysis['fan_out_queries'] = fan_out_queries analysis['persona_reasoning'] = fan_out_data.get('persona_reasoning', '') analysis['query_categories'] = fan_out_data.get('query_categories', []) return analysis except Exception as e: return self._demo_comprehensive_analysis(main_query, persona, fan_out_queries) else: return self._demo_comprehensive_analysis(main_query, persona, fan_out_queries) def generate_professional_report(self, results, content_title, main_query, persona): """Generate a professional report from analysis results""" report = f"""# Content Analysis Report: {content_title} ## Executive Summary {results.get('executive_summary', 'Comprehensive analysis of content effectiveness and strategic recommendations for improvement.')} {results.get('business_impact', 'This analysis identifies key opportunities for content enhancement and strategic positioning.')} ## Performance Metrics **Coverage Score:** {results.get('coverage_score', 'N/A')}/10 - {"Excellent" if results.get('coverage_score', 0) >= 8 else "Good" if results.get('coverage_score', 0) >= 6 else "Needs Improvement"} content coverage **Intent Fulfillment:** {results.get('intent_fulfillment', 'N/A')}/10 - {"Fully addresses" if results.get('intent_fulfillment', 0) >= 9 else "Mostly addresses" if results.get('intent_fulfillment', 0) >= 7 else "Partially addresses"} "{main_query}" **Fan-Out Coverage:** {results.get('fan_out_coverage', 'N/A')}/10 - {"Comprehensive" if results.get('fan_out_coverage', 0) >= 8 else "Adequate" if results.get('fan_out_coverage', 0) >= 6 else "Limited"} coverage of related topics ## Query Fan-Out Analysis **Main Query:** {main_query} **Generated Fan-Out Queries ({len(results.get('fan_out_queries', []))}):** """ # Add fan-out queries with categories if available fan_out_queries = results.get('fan_out_queries', []) query_categories = results.get('query_categories', []) if fan_out_queries: for i, query in enumerate(fan_out_queries, 1): category = f" *({query_categories[i-1]})*" if i-1 < len(query_categories) else "" report += f"{i}. {query}{category}\n" else: report += "No fan-out queries generated.\n" report += f"\n**Persona Reasoning:** {results.get('persona_reasoning', 'Analysis tailored to the specific needs and decision-making process of the target persona.')}\n" # Add Content Section Scores if available if results.get('section_scores'): report += f""" ## Content Section Analysis **Topical Relevance Scores by Section:** """ section_scores = results.get('section_scores', []) for i, section_score in enumerate(section_scores, 1): score = section_score.get('score', 'N/A') snippet = section_score.get('snippet', '')[:100] + '...' if len(section_score.get('snippet', '')) > 100 else section_score.get('snippet', '') relevance = section_score.get('relevance_reason', 'No analysis available') score_label = "🟢 High" if score >= 8 else "🟡 Medium" if score >= 6 else "🔴 Low" if score >= 1 else "N/A" report += f"""**Section {i}:** {score}/10 {score_label} *Content:* "{snippet}" *Analysis:* {relevance} """ report += f"""## Content Strengths The content demonstrates several key strengths that make it valuable for {persona.lower()}s: """ # Add strongest sections for i, strength in enumerate(results.get('strongest_sections', []), 1): report += f"**{self._extract_title(strength)}:** {self._extract_description(strength)}\n\n" # Corrected syntax for the next multiline string report += """## Critical Weaknesses Several significant weaknesses limit the content's practical utility: """ # Add weakest sections for i, weakness in enumerate(results.get('weakest_sections', []), 1): report += f"**{self._extract_title(weakness)}:** {self._extract_description(weakness)}\n\n" report += f"""## Essential Missing Elements The analysis identified {len(results.get('missing_elements', []))} critical gaps that significantly limit the content's practical value for {persona.lower()}s: """ # Add missing elements with detailed explanations for i, element in enumerate(results.get('missing_elements', []), 1): title = self._extract_title(element) description = self._extract_description(element) report += f"### {i}. {title}\n{description}\n\n" report += """## Strategic Recommendations To transform this content into a comprehensive resource, the following enhancements are recommended: """ # Add improvements for i, improvement in enumerate(results.get('improvements', []), 1): report += f"**{self._extract_title(improvement)}:** {self._extract_description(improvement)}\n\n" # Add implementation priority if available if results.get('implementation_priority'): report += """## Implementation Priority Based on impact and feasibility, address improvements in this order: """ for i, priority in enumerate(results.get('implementation_priority', []), 1): report += f"{i}. {priority}\n" report += "\n" report += f"""## Conclusion {"This content provides a solid foundation but requires strategic enhancement to serve as a complete resource for " + persona.lower() + "s." if results.get('coverage_score', 0) >= 7 else "This content needs significant improvement to effectively serve " + persona.lower() + "s."} The identified gaps represent substantial opportunities for improved business outcomes and user engagement. Addressing these areas systematically will transform the content from {"an educational resource into a comprehensive strategic tool" if results.get('coverage_score', 0) >= 7 else "a basic overview into a valuable professional resource"}. --- *Report generated on {datetime.now().strftime('%B %d, %Y')} | Analysis Target: {persona} | Query: "{main_query}"*""" return report def _extract_title(self, text): """Extract title from text (everything before first dash or colon)""" if ' - ' in text: return text.split(' - ')[0].strip() elif ': ' in text: return text.split(': ')[0].strip() else: words = text.split() return ' '.join(words[:4]) + ('...' if len(words) > 4 else '') def _extract_description(self, text): """Extract description from text (everything after first dash or colon)""" if ' - ' in text: return text.split(' - ', 1)[1].strip() elif ': ' in text: return text.split(': ', 1)[1].strip() else: return text.strip() def _call_api(self, prompt, api_key): """Call Anthropic API""" headers = { "Content-Type": "application/json", "x-api-key": api_key, "anthropic-version": "2023-06-01" } data = { "model": "claude-sonnet-4-20250514", "max_tokens": 3000, "messages": [{"role": "user", "content": prompt}] } response = requests.post("https://api.anthropic.com/v1/messages", headers=headers, json=data) if response.status_code != 200: raise Exception(f"API returned {response.status_code}: {response.text}") content = response.json()['content'][0]['text'] # Clean JSON response if '```json' in content: content = content.split('```json')[1].split('```')[0] return content.strip() def _demo_fan_out(self, main_query, persona): """Demo fan-out queries when API is not available""" return { "fan_out_queries": [ f"How does {main_query.lower()} specifically benefit {persona.lower()}?", f"What are the costs and ROI of {main_query.lower()} for {persona.lower()}?", f"What challenges might {persona.lower()} face with {main_query.lower()}?", f"How to implement {main_query.lower()} step by step for {persona.lower()}?", f"What alternatives to {main_query.lower()} should {persona.lower()} consider?", f"How long does {main_query.lower()} typically take for {persona.lower()}?", f"What tools or resources does {persona.lower()} need for {main_query.lower()}?", f"What mistakes should {persona.lower()} avoid with {main_query.lower()}?" ], "query_categories": ["Benefits", "Costs", "Challenges", "Implementation", "Alternatives", "Timeline", "Resources", "Pitfalls"], "persona_reasoning": f"Generated queries covering the full spectrum of what {persona} would need to know about {main_query}, from initial research to implementation." } def _demo_comprehensive_analysis(self, main_query, persona, fan_out_queries): """Enhanced demo analysis for when API is not available""" return { "coverage_score": 7, "intent_fulfillment": 8, "fan_out_coverage": 6, "executive_summary": f"The analyzed content provides a solid foundation for understanding {main_query.lower()} but lacks several critical elements that {persona.lower()}s need for successful implementation. While conceptual coverage is strong, practical execution guidance and business integration strategies require significant enhancement.", "query_gaps": [ "Implementation timeline and resource requirements not adequately covered - This gap prevents proper project planning and budget allocation", "Measurement and analytics guidance is insufficient - Without clear metrics, success cannot be properly evaluated or demonstrated to stakeholders" ], "strongest_sections": [ "Clear foundational explanation - Establishes context well and builds understanding progressively, making complex concepts accessible to the target persona", "Strong business case development - Effectively connects concepts to tangible business outcomes and strategic value, helping justify implementation decisions" ], "weakest_sections": [ "Limited practical implementation guidance - Lacks specific steps, timelines, and resource requirements needed for actual execution", "Missing measurement framework - No specific KPIs, benchmarks, or success metrics provided for tracking performance and demonstrating ROI" ], "improvements": [ "Add comprehensive ROI metrics and case studies - Include specific performance data, industry benchmarks, and real-world examples showing quantifiable results", "Develop competitive analysis framework - Provide systematic approaches for identifying opportunities and gaps compared to competitors", "Include detailed implementation planning - Add timeline expectations, resource requirements, and project management guidance for practical execution" ], "missing_elements": [ "Content promotion and distribution strategies - The content focuses only on creation but ignores how to effectively promote and distribute across multiple channels", "Integration guidance with existing systems - No guidance on how to integrate with current marketing automation, CRM systems, and lead nurturing workflows", "Budget considerations and resource allocation - Missing practical business planning elements including cost-benefit analysis and resource requirements by business size" ], "persona_specific_gaps": [ "No stakeholder buy-in strategies - Missing guidance on how to present concepts and gain executive support for implementation", "Limited competitive positioning insights - Lacks competitive analysis and market positioning strategies specific to the persona's competitive landscape" ], "strategic_recommendations": [ "Focus on practical implementation over theoretical concepts", "Add measurement and ROI frameworks for business justification", "Include system integration guidance for seamless adoption" ], "implementation_priority": [ "Add specific metrics and measurement frameworks first", "Develop practical implementation guidelines and timelines", "Create competitive analysis and positioning strategies", "Include system integration and workflow guidance" ], "business_impact": "The identified gaps significantly limit practical implementation success and may result in poor adoption, unmeasurable results, and difficulty demonstrating value to stakeholders.", "section_scores": [ { "snippet": "AI implementation in business requires careful planning and execution. Companies should start by", "score": 8, "relevance_reason": "Strong opening that directly addresses implementation strategy and provides clear business context for executives" }, { "snippet": "Key considerations include data quality, team training, and change management. Organizations need", "score": 7, "relevance_reason": "Covers important practical elements but lacks specific metrics and timelines that executives need for planning" }, { "snippet": "Successful AI adoption typically follows a phased approach: pilot projects, proof of concept,", "score": 6, "relevance_reason": "Provides useful framework but missing detailed implementation guidance and resource requirements" } ], "fan_out_queries": fan_out_queries, "persona_reasoning": f"Analysis tailored to {persona}'s specific decision-making process and information needs", "query_categories": ["Implementation", "ROI", "Resources", "Measurement"] } # ============================================================================ # GRADIO INTERFACE # ============================================================================ # Initialize the analyzer analyzer = ProfessionalContentAnalyzer() def analyze_content(content, main_query, persona, content_title, api_key): """Main function called by Gradio interface""" # Validate inputs if not content.strip(): return "❌ Please provide content to analyze." if not main_query.strip(): return "❌ Please provide a main query." if not persona.strip(): return "❌ Please specify a persona." if not content_title.strip(): content_title = f"{main_query} Analysis" try: # Perform analysis results = analyzer.analyze_content_comprehensive(content, main_query, persona, api_key) # Generate report report = analyzer.generate_professional_report(results, content_title, main_query, persona) return report except Exception as e: return f"❌ An error occurred during analysis: {str(e)}" # Create Gradio interface with gr.Blocks( title="Professional AI Content Analyzer", theme=gr.themes.Soft(), css=""" .gradio-container { max-width: 1200px !important; } .output-markdown { font-family: 'Georgia', serif; line-height: 1.6; } """ ) as demo: gr.HTML("""
Generate comprehensive content analysis reports with strategic recommendations