""" Sentiment Analysis Page Analyze content performance across all sentiment types with advanced filtering. Data is fetched on-demand: user sets filters then clicks "Fetch Data". Global filters (platform/brand/date) from the sidebar are pre-populated. """ import streamlit as st import pandas as pd import sys from pathlib import Path parent_dir = Path(__file__).resolve().parent.parent sys.path.append(str(parent_dir)) from visualizations.sentiment_charts import SentimentCharts from visualizations.distribution_charts import DistributionCharts from visualizations.content_cards import ContentCards from agents.content_summary_agent import ContentSummaryAgent def render_sentiment_analysis(data_loader): """ Render the Sentiment Analysis page. Args: data_loader: SentimentDataLoader instance """ st.title("πŸ” Custom Sentiment Queries") st.markdown("Analyze content performance based on sentiment patterns and user feedback") st.markdown("---") sentiment_charts = SentimentCharts() distribution_charts = DistributionCharts() summary_agent = ContentSummaryAgent(model="gpt-5-nano", temperature=1) if 'content_summaries' not in st.session_state: st.session_state.content_summaries = {} # ── Get filter options from the already-loaded (lightweight) dashboard df ─ dashboard_df = st.session_state.get('dashboard_df') if dashboard_df is None or dashboard_df.empty: st.warning("Dashboard data not loaded yet. Please wait for the app to initialise.") return available_platforms = sorted(dashboard_df['platform'].dropna().unique().tolist()) available_brands = sorted(dashboard_df['brand'].dropna().unique().tolist()) # ── Pre-populate from global sidebar filters ─────────────────────────────── global_filters = st.session_state.get('global_filters', {}) global_platforms = global_filters.get('platforms', []) global_brands = global_filters.get('brands', []) global_date_range = global_filters.get('date_range') # ── Platform & Brand selection ───────────────────────────────────────────── st.markdown("### 🎯 Select Platform and Brand") st.info( "⚑ **Performance**: Choose a platform and brand, set optional filters, " "then click **Fetch Data** to run a targeted Snowflake query." ) filter_col1, filter_col2 = st.columns(2) with filter_col1: default_platform_idx = 0 if global_platforms and global_platforms[0] in available_platforms: default_platform_idx = available_platforms.index(global_platforms[0]) + 1 # +1 for blank selected_platform = st.selectbox( "Platform *", options=[''] + available_platforms, index=default_platform_idx, help="Select the platform to analyse" ) with filter_col2: default_brand_idx = 0 if global_brands and global_brands[0] in available_brands: default_brand_idx = available_brands.index(global_brands[0]) + 1 selected_brand = st.selectbox( "Brand *", options=[''] + available_brands, index=default_brand_idx, help="Select the brand to analyse" ) if not selected_platform or not selected_brand: st.warning("⚠️ Please select both **Platform** and **Brand** to continue.") st.markdown("---") # Quick summary from dashboard data st.markdown("### πŸ“Š Available Data Summary") col1, col2, col3 = st.columns(3) with col1: st.metric("Total Comments", f"{len(dashboard_df):,}") with col2: st.metric("Platforms", len(available_platforms)) with st.expander("View Platforms"): for p in available_platforms: cnt = (dashboard_df['platform'] == p).sum() st.write(f"- **{p}**: {cnt:,} comments") with col3: st.metric("Brands", len(available_brands)) with st.expander("View Brands"): for b in available_brands: cnt = (dashboard_df['brand'] == b).sum() st.write(f"- **{b}**: {cnt:,} comments") return st.markdown("---") # ── Content filters ──────────────────────────────────────────────────────── st.markdown("### πŸ” Content Filters") # Build available sentiment / intent options from dashboard_df filtered to # selected platform+brand (fast β€” no text columns involved) mask = (dashboard_df['platform'] == selected_platform) & (dashboard_df['brand'] == selected_brand) preview_df = dashboard_df[mask] filter_col1, filter_col2, filter_col3, filter_col4 = st.columns(4) with filter_col1: sentiment_options = sorted(preview_df['sentiment_polarity'].unique().tolist()) selected_sentiments = st.multiselect( "Sentiment", options=sentiment_options, default=[], help="Filter by dominant sentiment. Leave empty for all." ) with filter_col2: intent_list = ( preview_df['intent'] .str.split(',').explode().str.strip() .dropna().unique().tolist() ) selected_intents = st.multiselect( "Intent", options=sorted(i for i in intent_list if i), default=[], help="Filter contents that have comments with these intents" ) with filter_col3: top_n = st.selectbox( "Top N Contents", options=[5, 10, 15, 20, 25], index=1, help="Number of contents to display" ) with filter_col4: filter_active = bool(selected_sentiments or selected_intents) st.metric( "Filters Active", "βœ“ Yes" if filter_active else "βœ— No", help="Sentiment or intent filters applied" if filter_active else "Showing all sentiments" ) st.markdown("---") # ── Advanced ranking controls ────────────────────────────────────────────── with st.expander("βš™οΈ Advanced Ranking Controls", expanded=False): adv_col1, adv_col2 = st.columns(2) with adv_col1: min_comments = st.slider( "Minimum Comments Required", min_value=1, max_value=50, value=10, step=1, help="Exclude contents with fewer comments than this threshold." ) with adv_col2: sort_by = st.selectbox( "Sort By", options=[ ('severity_score', '🎯 Severity Score (Balanced) β€” Recommended'), ('sentiment_percentage', 'πŸ“Š Sentiment Percentage'), ('sentiment_count', 'πŸ”’ Sentiment Count (Absolute)'), ('total_comments', 'πŸ’¬ Total Comments (Volume)'), ], format_func=lambda x: x[1], index=0 ) sort_by_value = sort_by[0] sentiment_label = "selected sentiments" if selected_sentiments else "negative sentiments" info_map = { 'severity_score': f"πŸ“˜ **Severity Score** = Sentiment % Γ— √(Total Comments). Balances {sentiment_label} % with volume.", 'sentiment_percentage': f"πŸ“˜ Ranks by highest % of {sentiment_label}. May include low-volume contents.", 'sentiment_count': f"πŸ“˜ Ranks by absolute number of {sentiment_label} comments.", 'total_comments': "πŸ“˜ Ranks by total comment volume, regardless of sentiment.", } st.info(info_map.get(sort_by_value, "")) # Date range for the query (inherit from global filters if set) if global_date_range and len(global_date_range) == 2: query_date_range = global_date_range else: query_date_range = None # ── Fetch button ─────────────────────────────────────────────────────────── fetch_key = ( selected_platform, selected_brand, top_n, min_comments, sort_by_value, tuple(sorted(selected_sentiments)), tuple(sorted(selected_intents)), str(query_date_range) ) fetch_col, info_col = st.columns([1, 3]) with fetch_col: fetch_clicked = st.button("πŸš€ Fetch Data", use_container_width=True, type="primary") # Auto-fetch if the key hasn't changed and we already have data has_data = ( 'sa_contents' in st.session_state and st.session_state.get('sa_fetch_key') == fetch_key and not st.session_state['sa_contents'].empty ) with info_col: if has_data: n_contents = len(st.session_state['sa_contents']) n_comments = len(st.session_state.get('sa_comments', [])) st.success(f"βœ… Showing **{n_contents}** contents with **{n_comments:,}** sampled comments") elif fetch_clicked: pass # spinner shown below else: st.info("πŸ‘† Click **Fetch Data** to run a targeted Snowflake query with the settings above.") if fetch_clicked: with st.spinner("Fetching data from Snowflake…"): contents_df, comments_df = data_loader.load_sa_data( platform=selected_platform, brand=selected_brand, top_n=top_n, min_comments=min_comments, sort_by=sort_by_value, sentiments=selected_sentiments or None, intents=selected_intents or None, date_range=query_date_range, ) st.session_state['sa_contents'] = contents_df st.session_state['sa_comments'] = comments_df st.session_state['sa_fetch_key'] = fetch_key st.session_state['sa_platform'] = selected_platform st.session_state['sa_brand'] = selected_brand # Reset pagination on new fetch st.session_state['sentiment_page'] = 1 st.rerun() # ── Nothing fetched yet ──────────────────────────────────────────────────── if not has_data and not fetch_clicked: return filtered_contents = st.session_state.get('sa_contents', pd.DataFrame()) comments_df = st.session_state.get('sa_comments', pd.DataFrame()) if filtered_contents.empty: st.warning("No content data found with the selected filters. Try adjusting and re-fetching.") return # ── Summary stats ────────────────────────────────────────────────────────── st.markdown("### πŸ“Š Summary") col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Contents Analysed", len(filtered_contents)) with col2: if 'selected_sentiment_percentage' in filtered_contents.columns: avg_pct = filtered_contents['selected_sentiment_percentage'].mean() label = "Selected Sentiment %" if selected_sentiments else "Avg Negative %" st.metric(label, f"{avg_pct:.1f}%") else: st.metric("Avg Negative %", f"{filtered_contents['negative_percentage'].mean():.1f}%") with col3: st.metric("Total Comments", int(filtered_contents['total_comments'].sum())) with col4: st.metric("Total Replies Needed", int(filtered_contents['reply_required_count'].sum())) st.markdown("---") # ── Engagement scatter ───────────────────────────────────────────────────── st.markdown("### πŸ“ˆ Content Engagement Analysis") scatter = distribution_charts.create_engagement_scatter( filtered_contents, title="Content Engagement vs. Sentiment" ) st.plotly_chart(scatter, use_container_width=True, key="engagement_scatter_chart") st.markdown("---") # ── Paginated content cards ──────────────────────────────────────────────── st.markdown("### πŸ” Detailed Content Analysis") if 'sentiment_page' not in st.session_state: st.session_state.sentiment_page = 1 items_per_page = 5 total_contents = len(filtered_contents) total_pages = (total_contents + items_per_page - 1) // items_per_page if total_contents > items_per_page: st.info(f"πŸ“„ Page {st.session_state.sentiment_page} of {total_pages} ({total_contents} total contents)") col_prev, col_info, col_next = st.columns([1, 2, 1]) with col_prev: if st.button("⬅️ Previous", key="prev_top", disabled=st.session_state.sentiment_page == 1): st.session_state.sentiment_page -= 1 st.rerun() with col_info: st.markdown( f"
" f"Page {st.session_state.sentiment_page} / {total_pages}
", unsafe_allow_html=True ) with col_next: if st.button("Next ➑️", key="next_top", disabled=st.session_state.sentiment_page >= total_pages): st.session_state.sentiment_page += 1 st.rerun() st.markdown("---") start_idx = (st.session_state.sentiment_page - 1) * items_per_page end_idx = min(start_idx + items_per_page, total_contents) paginated = filtered_contents.iloc[start_idx:end_idx] for idx, (_, content_row) in enumerate(paginated.iterrows(), start_idx + 1): ContentCards.display_content_card(content_row, rank=idx) # Comments from the sampled set (pre-fetched, no extra Snowflake call) if not comments_df.empty and 'content_sk' in comments_df.columns: content_comments = comments_df[comments_df['content_sk'] == content_row['content_sk']] else: content_comments = pd.DataFrame() if content_comments.empty: st.info("No sampled comment details available for this content.") else: viz_col1, viz_col2 = st.columns(2) with viz_col1: pie = sentiment_charts.create_sentiment_pie_chart( content_comments, title="Sentiment Distribution (sample)" ) st.plotly_chart(pie, use_container_width=True, key=f"sentiment_pie_{content_row['content_sk']}") with viz_col2: bar = distribution_charts.create_intent_bar_chart( content_comments, title="Intent Distribution (sample)", orientation='h' ) st.plotly_chart(bar, use_container_width=True, key=f"intent_bar_{content_row['content_sk']}") # AI Analysis st.markdown("#### πŸ€– AI-Powered Analysis") content_sk = content_row['content_sk'] st.markdown("**Select analysis type:**") btn_col1, btn_col2, btn_col3 = st.columns(3) with btn_col1: gen_neg = st.button("πŸ“‰ Negative Summary", key=f"ai_negative_{content_sk}", use_container_width=True) with btn_col2: gen_combined = st.button("πŸ“Š Combined Summary", key=f"ai_combined_{content_sk}", use_container_width=True) with btn_col3: gen_pos = st.button("πŸ“ˆ Positive Summary", key=f"ai_positive_{content_sk}", use_container_width=True) summary_type = None if gen_neg: summary_type = 'negative' elif gen_pos: summary_type = 'positive' elif gen_combined: summary_type = 'combined' key_neg = f"{content_sk}_negative" key_pos = f"{content_sk}_positive" key_com = f"{content_sk}_combined" if summary_type or any(k in st.session_state.content_summaries for k in (key_neg, key_pos, key_com)): if summary_type: summary_key = f"{content_sk}_{summary_type}" with st.spinner(f"Analysing {summary_type} comments with AI…"): result = summary_agent.process({ 'content_sk': content_sk, 'content_description': content_row['content_description'], 'comments': content_comments, 'sentiment_type': summary_type }) st.session_state.content_summaries[summary_key] = result for label, key in [('Negative', key_neg), ('Combined', key_com), ('Positive', key_pos)]: if key not in st.session_state.content_summaries: continue result = st.session_state.content_summaries[key] if result['success']: summary = result['summary'] with st.expander(f"πŸ“Š AI Analysis Report β€” {label}", expanded=True): st.markdown("### Executive Summary") st.info(summary['executive_summary']) if summary['main_themes']: st.markdown("### 🎯 Main Themes") for theme in summary['main_themes']: emoji = {'positive': '😊', 'negative': '😟', 'mixed': 'πŸ€”'}.get( theme.get('sentiment', 'mixed'), 'πŸ€”') st.markdown(f"**{emoji} {theme.get('theme')}** ({theme.get('sentiment','mixed').title()})\n- {theme.get('description','')}") col_p, col_c = st.columns(2) with col_p: st.markdown("### βœ… Praise Points") for pt in summary.get('praise_points', []): st.markdown(f"- {pt}") with col_c: st.markdown("### ⚠️ Key Complaints") for pt in summary.get('key_complaints', []): st.markdown(f"- {pt}") col_f, col_i = st.columns(2) with col_f: st.markdown("### ❓ FAQs") for q in summary.get('frequently_asked_questions', []): st.markdown(f"- {q}") with col_i: st.markdown("### πŸ’‘ Insights") for ins in summary.get('unexpected_insights', []): st.markdown(f"- {ins}") if summary.get('action_recommendations'): st.markdown("### 🎯 Recommended Actions") for action in summary['action_recommendations']: priority = action.get('priority', 'medium').upper() emoji = {'HIGH': 'πŸ”΄', 'MEDIUM': '🟑', 'LOW': '🟒'}.get(priority, '🟑') st.markdown(f"{emoji} **[{priority}]** {action.get('action','')}") with st.expander("ℹ️ Analysis Metadata"): meta = result.get('metadata', {}) mc1, mc2, mc3 = st.columns(3) mc1.metric("Comments Analysed", meta.get('total_comments_analyzed', 0)) mc2.metric("Model Used", meta.get('model_used', 'N/A')) mc3.metric("Tokens Used", meta.get('tokens_used', 0)) else: st.error(f"❌ AI analysis failed: {result.get('error','Unknown error')}") if st.button("πŸ”„ Retry", key=f"retry_{key}"): del st.session_state.content_summaries[key] st.rerun() # Comment expansion (text already loaded from fetch) st.markdown("#### πŸ’¬ View Comments by Sentiment") if not content_comments.empty: neg_comments = content_comments[ content_comments['sentiment_polarity'].isin(['negative', 'very_negative']) ] pos_comments = content_comments[ content_comments['sentiment_polarity'].isin(['positive', 'very_positive']) ] col_neg, col_pos = st.columns(2) with col_neg: with st.expander(f"πŸ“‰ Negative Comments ({len(neg_comments)} sampled)", expanded=False): if not neg_comments.empty: for _, comment in neg_comments.iterrows(): ContentCards.display_comment_card(comment, show_original=True) else: st.info("No negative comments in sample.") with col_pos: with st.expander(f"πŸ“ˆ Positive Comments ({len(pos_comments)} sampled)", expanded=False): if not pos_comments.empty: for _, comment in pos_comments.iterrows(): ContentCards.display_comment_card(comment, show_original=True) else: st.info("No positive comments in sample.") else: st.info("No comments available for this content in the current sample.") st.markdown("---") # ── Bottom pagination ────────────────────────────────────────────────────── if total_contents > items_per_page: col_prev_b, col_info_b, col_next_b = st.columns([1, 2, 1]) with col_prev_b: if st.button("⬅️ Previous", key="prev_bottom", disabled=st.session_state.sentiment_page == 1): st.session_state.sentiment_page -= 1 st.rerun() with col_info_b: st.markdown( f"
" f"Page {st.session_state.sentiment_page} / {total_pages}
", unsafe_allow_html=True ) with col_next_b: if st.button("Next ➑️", key="next_bottom", disabled=st.session_state.sentiment_page >= total_pages): st.session_state.sentiment_page += 1 st.rerun() st.markdown("---") # ── Insights & recommendations (using sampled comments) ─────────────────── st.markdown("### πŸ’‘ Insights & Recommendations") from utils.data_processor import SentimentDataProcessor processor = SentimentDataProcessor() all_sampled = comments_df[ comments_df['content_sk'].isin(filtered_contents['content_sk']) ] if not comments_df.empty else pd.DataFrame() insight_col1, insight_col2 = st.columns(2) with insight_col1: st.markdown("#### 🎯 Common Intent Patterns") if not all_sampled.empty: intent_dist = processor.get_intent_distribution(all_sampled) for _, row in intent_dist.sort_values('count', ascending=False).head(5).iterrows(): st.markdown(f"- **{row['intent']}**: {row['count']} ({row['percentage']:.1f}%)") with insight_col2: st.markdown("#### 🌐 Platform Breakdown") if not all_sampled.empty: for platform, count in all_sampled['platform'].value_counts().items(): pct = count / len(all_sampled) * 100 st.markdown(f"- **{platform.title()}**: {count} comments ({pct:.1f}%)") st.markdown("---") # ── Action items ─────────────────────────────────────────────────────────── st.markdown("### βœ… Recommended Actions") action_items = [] total_replies = int(filtered_contents['reply_required_count'].sum()) if total_replies > 0: action_items.append(f"πŸ”΄ **High Priority**: {total_replies} comments require immediate response") critical = filtered_contents[filtered_contents['negative_percentage'] > 50] if not critical.empty: action_items.append( f"🚨 **Critical**: {len(critical)} content(s) have >50% negative sentiment β€” investigate root causes" ) if not all_sampled.empty: feedback_cnt = all_sampled['intent'].str.contains('feedback_negative', na=False).sum() if feedback_cnt: action_items.append(f"πŸ’¬ **Feedback**: {feedback_cnt} negative-feedback comments β€” consider product improvements") question_cnt = all_sampled['intent'].str.contains('question', na=False).sum() if question_cnt: action_items.append(f"❓ **Questions**: {question_cnt} questions β€” improve FAQ or support docs") if action_items: for item in action_items: st.markdown(item) else: st.success("No critical action items at this time.") st.markdown("---") # ── Export ───────────────────────────────────────────────────────────────── st.markdown("### πŸ’Ύ Export Data") col1, col2 = st.columns([1, 3]) with col1: base_cols = ['content_sk', 'content_description', 'permalink_url', 'total_comments', 'reply_required_count', 'dominant_sentiment'] for extra in ['selected_sentiment_count', 'selected_sentiment_percentage', 'negative_count', 'negative_percentage']: if extra in filtered_contents.columns: base_cols.append(extra) export_cols = [c for c in base_cols if c in filtered_contents.columns] csv = filtered_contents[export_cols].to_csv(index=False) st.download_button( label="πŸ“₯ Download as CSV", data=csv, file_name=f"sentiment_analysis_top{top_n}.csv", mime="text/csv" ) with col2: st.info("Download the data for further analysis or reporting.")