""" Content Summary Agent Analyzes and summarizes comments for content pieces """ import pandas as pd from typing import Dict, Any, List import sys from pathlib import Path # Add parent directory to path parent_dir = Path(__file__).resolve().parent.parent sys.path.append(str(parent_dir)) from agents.base_agent import BaseVisualizationAgent from utils.llm_helper import LLMHelper class ContentSummaryAgent(BaseVisualizationAgent): """ Agent that analyzes and summarizes comments for content Extracts themes, praise points, complaints, FAQs, and insights """ def __init__(self, model: str = "gpt-5-nano", temperature: float = 1): """ Initialize Content Summary Agent Args: model: LLM model to use temperature: Temperature for generation (lower for more focused summaries) """ super().__init__(name="ContentSummaryAgent", model=model, temperature=temperature) self.llm_helper = LLMHelper(model=model, temperature=temperature) def validate_input(self, input_data: Dict[str, Any]) -> bool: """ Validate input data Args: input_data: Input dictionary Returns: True if valid, False otherwise """ required_fields = ['content_sk', 'content_description', 'comments'] for field in required_fields: if field not in input_data: self.log_processing(f"Missing required field: {field}", level="error") return False if not isinstance(input_data['comments'], (list, pd.DataFrame)): self.log_processing("Comments must be a list or DataFrame", level="error") return False return True def _prepare_comments_context(self, comments: Any, sentiment_type: str = 'negative') -> str: """ Prepare comments data for LLM analysis Args: comments: Comments as DataFrame or list of dicts sentiment_type: Type of sentiment to analyze ('negative', 'positive', 'combined') Returns: Formatted string with comment data """ # Convert to DataFrame if needed if isinstance(comments, list): comments_df = pd.DataFrame(comments) else: comments_df = comments.copy() # Filter based on sentiment type if sentiment_type == 'negative': # Only negative comments comments_df = comments_df[ comments_df['sentiment_polarity'].isin(['negative', 'very_negative']) ] elif sentiment_type == 'positive': # Only positive comments comments_df = comments_df[ comments_df['sentiment_polarity'].isin(['positive', 'very_positive']) ] # else: combined - use all comments # Limit to reasonable number for API if len(comments_df) > 100: if sentiment_type == 'combined': # For combined: sample from both positive and negative negative_comments = comments_df[ comments_df['sentiment_polarity'].isin(['negative', 'very_negative']) ].sample(n=min(50, len(comments_df[comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])])), random_state=42) positive_comments = comments_df[ comments_df['sentiment_polarity'].isin(['positive', 'very_positive']) ].sample(n=min(50, len(comments_df[comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])])), random_state=42) comments_df = pd.concat([negative_comments, positive_comments]) else: # For single sentiment type: just sample comments_df = comments_df.sample(n=min(100, len(comments_df)), random_state=42) # Format comments for analysis comments_text = [] for idx, row in comments_df.iterrows(): text = row.get('display_text', row.get('original_text', '')) sentiment = row.get('sentiment_polarity', 'unknown') intent = row.get('intent', 'unknown') comment_entry = f""" Comment #{idx + 1}: - Text: {text[:300]}{'...' if len(str(text)) > 300 else ''} - Sentiment: {sentiment} - Intent: {intent} """ comments_text.append(comment_entry) return "\n".join(comments_text) def _generate_summary_prompt( self, content_description: str, comments_context: str, total_comments: int, sentiment_type: str = 'negative' ) -> str: """ Generate prompt for LLM Args: content_description: Description of the content comments_context: Formatted comments total_comments: Total number of comments sentiment_type: Type of sentiment being analyzed ('negative', 'positive', 'combined') Returns: Prompt string """ # Customize prompt based on sentiment type if sentiment_type == 'negative': focus_instruction = "Focus on understanding negative feedback, complaints, and issues that need attention." elif sentiment_type == 'positive': focus_instruction = "Focus on understanding what users love, praise points, and successful elements that should be maintained or amplified." else: # combined focus_instruction = "Provide a balanced analysis covering both positive feedback and areas for improvement." prompt = f"""Analyze the {sentiment_type} comments below for the following content and provide a brief executive summary. **Content:** {content_description} **Total Comments Analyzed:** {total_comments} **Analysis Focus:** {focus_instruction} **Comments to Analyze:** {comments_context} **Task:** Provide a concise executive summary in JSON format with the following structure: {{ "executive_summary": "2-3 sentence high-level overview focusing on {sentiment_type} sentiment", "main_themes": [ {{ "theme": "theme name", "sentiment": "positive/negative/mixed", "description": "brief description" }} ], "praise_points": ["point 1", "point 2", "point 3"], "key_complaints": ["complaint 1", "complaint 2", "complaint 3"], "frequently_asked_questions": ["question 1", "question 2"], "unexpected_insights": ["insight 1", "insight 2"], "action_recommendations": [ {{ "priority": "high/medium/low", "action": "recommended action" }} ] }} **Guidelines:** - Be concise and actionable - Focus on the most important insights from {sentiment_type} comments - Limit each list to top 3-5 items - If a section has no relevant items, use an empty list - Executive summary should capture the overall patterns and key takeaways """ return prompt def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]: """ Process comments and generate summary Args: input_data: { 'content_sk': content identifier, 'content_description': content title/description, 'comments': DataFrame or list of comment dicts, 'sentiment_type': 'negative', 'positive', or 'combined' (optional, defaults to 'negative') } Returns: { 'success': bool, 'content_sk': str, 'sentiment_type': str, 'summary': { 'executive_summary': str, 'main_themes': list, 'praise_points': list, 'key_complaints': list, 'frequently_asked_questions': list, 'unexpected_insights': list, 'action_recommendations': list }, 'metadata': { 'total_comments_analyzed': int, 'model_used': str, 'tokens_used': int } } """ try: # Validate input if not self.validate_input(input_data): return { 'success': False, 'error': 'Invalid input data', 'content_sk': input_data.get('content_sk', 'unknown') } content_sk = input_data['content_sk'] content_description = input_data['content_description'] comments = input_data['comments'] sentiment_type = input_data.get('sentiment_type', 'negative') # Default to negative for backward compatibility self.log_processing(f"Starting {sentiment_type} analysis for content: {content_sk}") # Convert to DataFrame if needed if isinstance(comments, list): comments_df = pd.DataFrame(comments) else: comments_df = comments.copy() total_comments = len(comments_df) if total_comments == 0: return { 'success': True, 'content_sk': content_sk, 'sentiment_type': sentiment_type, 'summary': { 'executive_summary': 'No comments available for analysis.', 'main_themes': [], 'praise_points': [], 'key_complaints': [], 'frequently_asked_questions': [], 'unexpected_insights': [], 'action_recommendations': [] }, 'metadata': { 'total_comments_analyzed': 0, 'model_used': self.model, 'tokens_used': 0 } } # Prepare comments context based on sentiment type comments_context = self._prepare_comments_context(comments_df, sentiment_type) # Get count of comments after filtering if sentiment_type == 'negative': filtered_count = len(comments_df[comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])]) elif sentiment_type == 'positive': filtered_count = len(comments_df[comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])]) else: filtered_count = total_comments if filtered_count == 0: return { 'success': True, 'content_sk': content_sk, 'sentiment_type': sentiment_type, 'summary': { 'executive_summary': f'No {sentiment_type} comments available for analysis.', 'main_themes': [], 'praise_points': [], 'key_complaints': [], 'frequently_asked_questions': [], 'unexpected_insights': [], 'action_recommendations': [] }, 'metadata': { 'total_comments_analyzed': 0, 'model_used': self.model, 'tokens_used': 0 } } # Generate prompt prompt = self._generate_summary_prompt( content_description, comments_context, filtered_count, sentiment_type ) # System message system_message = """You are an expert social media analyst specializing in sentiment analysis and community insights. Provide concise, actionable summaries that help content creators understand their audience feedback.""" # Get LLM response self.log_processing(f"Calling LLM for {sentiment_type} summary generation") response = self.llm_helper.get_structured_completion( prompt=prompt, system_message=system_message, max_retries=3 ) if not response['success']: return self.handle_error( Exception(response.get('error', 'LLM call failed')), context=f"content_sk={content_sk}, sentiment_type={sentiment_type}" ) # Extract summary summary = response['content'] # Ensure all expected fields exist default_summary = { 'executive_summary': '', 'main_themes': [], 'praise_points': [], 'key_complaints': [], 'frequently_asked_questions': [], 'unexpected_insights': [], 'action_recommendations': [] } # Merge with defaults for key in default_summary: if key not in summary: summary[key] = default_summary[key] self.log_processing(f"Successfully generated {sentiment_type} summary for content: {content_sk}") return { 'success': True, 'content_sk': content_sk, 'sentiment_type': sentiment_type, 'summary': summary, 'metadata': { 'total_comments_analyzed': filtered_count, 'model_used': response['model'], 'tokens_used': response['usage']['total_tokens'] } } except Exception as e: return self.handle_error( e, context=f"content_sk={input_data.get('content_sk', 'unknown')}, sentiment_type={input_data.get('sentiment_type', 'negative')}" )