| | """ |
| | Content Summary Agent |
| | Analyzes and summarizes comments for content pieces |
| | """ |
| | import pandas as pd |
| | from typing import Dict, Any, List |
| | import sys |
| | from pathlib import Path |
| |
|
| | |
| | parent_dir = Path(__file__).resolve().parent.parent |
| | sys.path.append(str(parent_dir)) |
| |
|
| | from agents.base_agent import BaseVisualizationAgent |
| | from utils.llm_helper import LLMHelper |
| |
|
| |
|
| | class ContentSummaryAgent(BaseVisualizationAgent): |
| | """ |
| | Agent that analyzes and summarizes comments for content |
| | Extracts themes, praise points, complaints, FAQs, and insights |
| | """ |
| |
|
| | def __init__(self, model: str = "gpt-5-nano", temperature: float = 1): |
| | """ |
| | Initialize Content Summary Agent |
| | |
| | Args: |
| | model: LLM model to use |
| | temperature: Temperature for generation (lower for more focused summaries) |
| | """ |
| | super().__init__(name="ContentSummaryAgent", model=model, temperature=temperature) |
| | self.llm_helper = LLMHelper(model=model, temperature=temperature) |
| |
|
| | def validate_input(self, input_data: Dict[str, Any]) -> bool: |
| | """ |
| | Validate input data |
| | |
| | Args: |
| | input_data: Input dictionary |
| | |
| | Returns: |
| | True if valid, False otherwise |
| | """ |
| | required_fields = ['content_sk', 'content_description', 'comments'] |
| |
|
| | for field in required_fields: |
| | if field not in input_data: |
| | self.log_processing(f"Missing required field: {field}", level="error") |
| | return False |
| |
|
| | if not isinstance(input_data['comments'], (list, pd.DataFrame)): |
| | self.log_processing("Comments must be a list or DataFrame", level="error") |
| | return False |
| |
|
| | return True |
| |
|
| | def _prepare_comments_context(self, comments: Any, sentiment_type: str = 'negative') -> str: |
| | """ |
| | Prepare comments data for LLM analysis |
| | |
| | Args: |
| | comments: Comments as DataFrame or list of dicts |
| | sentiment_type: Type of sentiment to analyze ('negative', 'positive', 'combined') |
| | |
| | Returns: |
| | Formatted string with comment data |
| | """ |
| | |
| | if isinstance(comments, list): |
| | comments_df = pd.DataFrame(comments) |
| | else: |
| | comments_df = comments.copy() |
| |
|
| | |
| | if sentiment_type == 'negative': |
| | |
| | comments_df = comments_df[ |
| | comments_df['sentiment_polarity'].isin(['negative', 'very_negative']) |
| | ] |
| | elif sentiment_type == 'positive': |
| | |
| | comments_df = comments_df[ |
| | comments_df['sentiment_polarity'].isin(['positive', 'very_positive']) |
| | ] |
| | |
| |
|
| | |
| | if len(comments_df) > 100: |
| | if sentiment_type == 'combined': |
| | |
| | negative_comments = comments_df[ |
| | comments_df['sentiment_polarity'].isin(['negative', 'very_negative']) |
| | ].sample(n=min(50, len(comments_df[comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])])), random_state=42) |
| |
|
| | positive_comments = comments_df[ |
| | comments_df['sentiment_polarity'].isin(['positive', 'very_positive']) |
| | ].sample(n=min(50, len(comments_df[comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])])), random_state=42) |
| |
|
| | comments_df = pd.concat([negative_comments, positive_comments]) |
| | else: |
| | |
| | comments_df = comments_df.sample(n=min(100, len(comments_df)), random_state=42) |
| |
|
| | |
| | comments_text = [] |
| | for idx, row in comments_df.iterrows(): |
| | text = row.get('display_text', row.get('original_text', '')) |
| | sentiment = row.get('sentiment_polarity', 'unknown') |
| | intent = row.get('intent', 'unknown') |
| |
|
| | comment_entry = f""" |
| | Comment #{idx + 1}: |
| | - Text: {text[:300]}{'...' if len(str(text)) > 300 else ''} |
| | - Sentiment: {sentiment} |
| | - Intent: {intent} |
| | """ |
| | comments_text.append(comment_entry) |
| |
|
| | return "\n".join(comments_text) |
| |
|
| | def _generate_summary_prompt( |
| | self, |
| | content_description: str, |
| | comments_context: str, |
| | total_comments: int, |
| | sentiment_type: str = 'negative' |
| | ) -> str: |
| | """ |
| | Generate prompt for LLM |
| | |
| | Args: |
| | content_description: Description of the content |
| | comments_context: Formatted comments |
| | total_comments: Total number of comments |
| | sentiment_type: Type of sentiment being analyzed ('negative', 'positive', 'combined') |
| | |
| | Returns: |
| | Prompt string |
| | """ |
| | |
| | if sentiment_type == 'negative': |
| | focus_instruction = "Focus on understanding negative feedback, complaints, and issues that need attention." |
| | elif sentiment_type == 'positive': |
| | focus_instruction = "Focus on understanding what users love, praise points, and successful elements that should be maintained or amplified." |
| | else: |
| | focus_instruction = "Provide a balanced analysis covering both positive feedback and areas for improvement." |
| |
|
| | prompt = f"""Analyze the {sentiment_type} comments below for the following content and provide a brief executive summary. |
| | |
| | **Content:** {content_description} |
| | |
| | **Total Comments Analyzed:** {total_comments} |
| | |
| | **Analysis Focus:** {focus_instruction} |
| | |
| | **Comments to Analyze:** |
| | {comments_context} |
| | |
| | **Task:** Provide a concise executive summary in JSON format with the following structure: |
| | |
| | {{ |
| | "executive_summary": "2-3 sentence high-level overview focusing on {sentiment_type} sentiment", |
| | "main_themes": [ |
| | {{ |
| | "theme": "theme name", |
| | "sentiment": "positive/negative/mixed", |
| | "description": "brief description" |
| | }} |
| | ], |
| | "praise_points": ["point 1", "point 2", "point 3"], |
| | "key_complaints": ["complaint 1", "complaint 2", "complaint 3"], |
| | "frequently_asked_questions": ["question 1", "question 2"], |
| | "unexpected_insights": ["insight 1", "insight 2"], |
| | "action_recommendations": [ |
| | {{ |
| | "priority": "high/medium/low", |
| | "action": "recommended action" |
| | }} |
| | ] |
| | }} |
| | |
| | **Guidelines:** |
| | - Be concise and actionable |
| | - Focus on the most important insights from {sentiment_type} comments |
| | - Limit each list to top 3-5 items |
| | - If a section has no relevant items, use an empty list |
| | - Executive summary should capture the overall patterns and key takeaways |
| | """ |
| | return prompt |
| |
|
| | def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]: |
| | """ |
| | Process comments and generate summary |
| | |
| | Args: |
| | input_data: { |
| | 'content_sk': content identifier, |
| | 'content_description': content title/description, |
| | 'comments': DataFrame or list of comment dicts, |
| | 'sentiment_type': 'negative', 'positive', or 'combined' (optional, defaults to 'negative') |
| | } |
| | |
| | Returns: |
| | { |
| | 'success': bool, |
| | 'content_sk': str, |
| | 'sentiment_type': str, |
| | 'summary': { |
| | 'executive_summary': str, |
| | 'main_themes': list, |
| | 'praise_points': list, |
| | 'key_complaints': list, |
| | 'frequently_asked_questions': list, |
| | 'unexpected_insights': list, |
| | 'action_recommendations': list |
| | }, |
| | 'metadata': { |
| | 'total_comments_analyzed': int, |
| | 'model_used': str, |
| | 'tokens_used': int |
| | } |
| | } |
| | """ |
| | try: |
| | |
| | if not self.validate_input(input_data): |
| | return { |
| | 'success': False, |
| | 'error': 'Invalid input data', |
| | 'content_sk': input_data.get('content_sk', 'unknown') |
| | } |
| |
|
| | content_sk = input_data['content_sk'] |
| | content_description = input_data['content_description'] |
| | comments = input_data['comments'] |
| | sentiment_type = input_data.get('sentiment_type', 'negative') |
| |
|
| | self.log_processing(f"Starting {sentiment_type} analysis for content: {content_sk}") |
| |
|
| | |
| | if isinstance(comments, list): |
| | comments_df = pd.DataFrame(comments) |
| | else: |
| | comments_df = comments.copy() |
| |
|
| | total_comments = len(comments_df) |
| |
|
| | if total_comments == 0: |
| | return { |
| | 'success': True, |
| | 'content_sk': content_sk, |
| | 'sentiment_type': sentiment_type, |
| | 'summary': { |
| | 'executive_summary': 'No comments available for analysis.', |
| | 'main_themes': [], |
| | 'praise_points': [], |
| | 'key_complaints': [], |
| | 'frequently_asked_questions': [], |
| | 'unexpected_insights': [], |
| | 'action_recommendations': [] |
| | }, |
| | 'metadata': { |
| | 'total_comments_analyzed': 0, |
| | 'model_used': self.model, |
| | 'tokens_used': 0 |
| | } |
| | } |
| |
|
| | |
| | comments_context = self._prepare_comments_context(comments_df, sentiment_type) |
| |
|
| | |
| | if sentiment_type == 'negative': |
| | filtered_count = len(comments_df[comments_df['sentiment_polarity'].isin(['negative', 'very_negative'])]) |
| | elif sentiment_type == 'positive': |
| | filtered_count = len(comments_df[comments_df['sentiment_polarity'].isin(['positive', 'very_positive'])]) |
| | else: |
| | filtered_count = total_comments |
| |
|
| | if filtered_count == 0: |
| | return { |
| | 'success': True, |
| | 'content_sk': content_sk, |
| | 'sentiment_type': sentiment_type, |
| | 'summary': { |
| | 'executive_summary': f'No {sentiment_type} comments available for analysis.', |
| | 'main_themes': [], |
| | 'praise_points': [], |
| | 'key_complaints': [], |
| | 'frequently_asked_questions': [], |
| | 'unexpected_insights': [], |
| | 'action_recommendations': [] |
| | }, |
| | 'metadata': { |
| | 'total_comments_analyzed': 0, |
| | 'model_used': self.model, |
| | 'tokens_used': 0 |
| | } |
| | } |
| |
|
| | |
| | prompt = self._generate_summary_prompt( |
| | content_description, |
| | comments_context, |
| | filtered_count, |
| | sentiment_type |
| | ) |
| |
|
| | |
| | system_message = """You are an expert social media analyst specializing in |
| | sentiment analysis and community insights. Provide concise, actionable summaries |
| | that help content creators understand their audience feedback.""" |
| |
|
| | |
| | self.log_processing(f"Calling LLM for {sentiment_type} summary generation") |
| | response = self.llm_helper.get_structured_completion( |
| | prompt=prompt, |
| | system_message=system_message, |
| | max_retries=3 |
| | ) |
| |
|
| | if not response['success']: |
| | return self.handle_error( |
| | Exception(response.get('error', 'LLM call failed')), |
| | context=f"content_sk={content_sk}, sentiment_type={sentiment_type}" |
| | ) |
| |
|
| | |
| | summary = response['content'] |
| |
|
| | |
| | default_summary = { |
| | 'executive_summary': '', |
| | 'main_themes': [], |
| | 'praise_points': [], |
| | 'key_complaints': [], |
| | 'frequently_asked_questions': [], |
| | 'unexpected_insights': [], |
| | 'action_recommendations': [] |
| | } |
| |
|
| | |
| | for key in default_summary: |
| | if key not in summary: |
| | summary[key] = default_summary[key] |
| |
|
| | self.log_processing(f"Successfully generated {sentiment_type} summary for content: {content_sk}") |
| |
|
| | return { |
| | 'success': True, |
| | 'content_sk': content_sk, |
| | 'sentiment_type': sentiment_type, |
| | 'summary': summary, |
| | 'metadata': { |
| | 'total_comments_analyzed': filtered_count, |
| | 'model_used': response['model'], |
| | 'tokens_used': response['usage']['total_tokens'] |
| | } |
| | } |
| |
|
| | except Exception as e: |
| | return self.handle_error( |
| | e, |
| | context=f"content_sk={input_data.get('content_sk', 'unknown')}, sentiment_type={input_data.get('sentiment_type', 'negative')}" |
| | ) |