Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Running

File size: 26,975 Bytes

"""
Sentiment Analysis Page
Analyze content performance across all sentiment types with advanced filtering.

Data is fetched on-demand: user sets filters then clicks "Fetch Data".
Global filters (platform/brand/date) from the sidebar are pre-populated.
"""
import streamlit as st
import pandas as pd
import sys
from pathlib import Path

parent_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(parent_dir))

from visualizations.sentiment_charts import SentimentCharts
from visualizations.distribution_charts import DistributionCharts
from visualizations.content_cards import ContentCards
from agents.content_summary_agent import ContentSummaryAgent


def render_sentiment_analysis(data_loader):
    """
    Render the Sentiment Analysis page.

    Args:
        data_loader: SentimentDataLoader instance
    """
    st.title("🔍 Custom Sentiment Queries")
    st.markdown("Analyze content performance based on sentiment patterns and user feedback")
    st.markdown("---")

    sentiment_charts = SentimentCharts()
    distribution_charts = DistributionCharts()
    summary_agent = ContentSummaryAgent(model="gpt-5-nano", temperature=1)

    if 'content_summaries' not in st.session_state:
        st.session_state.content_summaries = {}

    # ── Get filter options from the already-loaded (lightweight) dashboard df ─
    dashboard_df = st.session_state.get('dashboard_df')
    if dashboard_df is None or dashboard_df.empty:
        st.warning("Dashboard data not loaded yet. Please wait for the app to initialise.")
        return

    available_platforms = sorted(dashboard_df['platform'].dropna().unique().tolist())
    available_brands    = sorted(dashboard_df['brand'].dropna().unique().tolist())

    # ── Pre-populate from global sidebar filters ───────────────────────────────
    global_filters = st.session_state.get('global_filters', {})
    global_platforms  = global_filters.get('platforms', [])
    global_brands     = global_filters.get('brands', [])
    global_date_range = global_filters.get('date_range')

    # ── Platform & Brand selection ─────────────────────────────────────────────
    st.markdown("### 🎯 Select Platform and Brand")
    st.info(
        "⚡ **Performance**: Choose a platform and brand, set optional filters, "
        "then click **Fetch Data** to run a targeted Snowflake query."
    )

    filter_col1, filter_col2 = st.columns(2)

    with filter_col1:
        default_platform_idx = 0
        if global_platforms and global_platforms[0] in available_platforms:
            default_platform_idx = available_platforms.index(global_platforms[0]) + 1  # +1 for blank
        selected_platform = st.selectbox(
            "Platform *",
            options=[''] + available_platforms,
            index=default_platform_idx,
            help="Select the platform to analyse"
        )

    with filter_col2:
        default_brand_idx = 0
        if global_brands and global_brands[0] in available_brands:
            default_brand_idx = available_brands.index(global_brands[0]) + 1
        selected_brand = st.selectbox(
            "Brand *",
            options=[''] + available_brands,
            index=default_brand_idx,
            help="Select the brand to analyse"
        )

    if not selected_platform or not selected_brand:
        st.warning("⚠️ Please select both **Platform** and **Brand** to continue.")
        st.markdown("---")

        # Quick summary from dashboard data
        st.markdown("### 📊 Available Data Summary")
        col1, col2, col3 = st.columns(3)
        with col1:
            st.metric("Total Comments", f"{len(dashboard_df):,}")
        with col2:
            st.metric("Platforms", len(available_platforms))
            with st.expander("View Platforms"):
                for p in available_platforms:
                    cnt = (dashboard_df['platform'] == p).sum()
                    st.write(f"- **{p}**: {cnt:,} comments")
        with col3:
            st.metric("Brands", len(available_brands))
            with st.expander("View Brands"):
                for b in available_brands:
                    cnt = (dashboard_df['brand'] == b).sum()
                    st.write(f"- **{b}**: {cnt:,} comments")
        return

    st.markdown("---")

    # ── Content filters ────────────────────────────────────────────────────────
    st.markdown("### 🔍 Content Filters")

    # Build available sentiment / intent options from dashboard_df filtered to
    # selected platform+brand (fast — no text columns involved)
    mask = (dashboard_df['platform'] == selected_platform) & (dashboard_df['brand'] == selected_brand)
    preview_df = dashboard_df[mask]

    filter_col1, filter_col2, filter_col3, filter_col4 = st.columns(4)

    with filter_col1:
        sentiment_options = sorted(preview_df['sentiment_polarity'].unique().tolist())
        selected_sentiments = st.multiselect(
            "Sentiment",
            options=sentiment_options,
            default=[],
            help="Filter by dominant sentiment. Leave empty for all."
        )

    with filter_col2:
        intent_list = (
            preview_df['intent']
            .str.split(',').explode().str.strip()
            .dropna().unique().tolist()
        )
        selected_intents = st.multiselect(
            "Intent",
            options=sorted(i for i in intent_list if i),
            default=[],
            help="Filter contents that have comments with these intents"
        )

    with filter_col3:
        top_n = st.selectbox(
            "Top N Contents",
            options=[5, 10, 15, 20, 25],
            index=1,
            help="Number of contents to display"
        )

    with filter_col4:
        filter_active = bool(selected_sentiments or selected_intents)
        st.metric(
            "Filters Active",
            "✓ Yes" if filter_active else "✗ No",
            help="Sentiment or intent filters applied" if filter_active else "Showing all sentiments"
        )

    st.markdown("---")

    # ── Advanced ranking controls ──────────────────────────────────────────────
    with st.expander("⚙️ Advanced Ranking Controls", expanded=False):
        adv_col1, adv_col2 = st.columns(2)
        with adv_col1:
            min_comments = st.slider(
                "Minimum Comments Required",
                min_value=1, max_value=50, value=10, step=1,
                help="Exclude contents with fewer comments than this threshold."
            )
        with adv_col2:
            sort_by = st.selectbox(
                "Sort By",
                options=[
                    ('severity_score',        '🎯 Severity Score (Balanced) — Recommended'),
                    ('sentiment_percentage',   '📊 Sentiment Percentage'),
                    ('sentiment_count',        '🔢 Sentiment Count (Absolute)'),
                    ('total_comments',         '💬 Total Comments (Volume)'),
                ],
                format_func=lambda x: x[1],
                index=0
            )
            sort_by_value = sort_by[0]

        sentiment_label = "selected sentiments" if selected_sentiments else "negative sentiments"
        info_map = {
            'severity_score':      f"📘 **Severity Score** = Sentiment % × √(Total Comments). Balances {sentiment_label} % with volume.",
            'sentiment_percentage': f"📘 Ranks by highest % of {sentiment_label}. May include low-volume contents.",
            'sentiment_count':      f"📘 Ranks by absolute number of {sentiment_label} comments.",
            'total_comments':       "📘 Ranks by total comment volume, regardless of sentiment.",
        }
        st.info(info_map.get(sort_by_value, ""))

    # Date range for the query (inherit from global filters if set)
    if global_date_range and len(global_date_range) == 2:
        query_date_range = global_date_range
    else:
        query_date_range = None

    # ── Fetch button ───────────────────────────────────────────────────────────
    fetch_key = (
        selected_platform, selected_brand, top_n, min_comments, sort_by_value,
        tuple(sorted(selected_sentiments)), tuple(sorted(selected_intents)),
        str(query_date_range)
    )

    fetch_col, info_col = st.columns([1, 3])
    with fetch_col:
        fetch_clicked = st.button("🚀 Fetch Data", use_container_width=True, type="primary")

    # Auto-fetch if the key hasn't changed and we already have data
    has_data = (
        'sa_contents' in st.session_state
        and st.session_state.get('sa_fetch_key') == fetch_key
        and not st.session_state['sa_contents'].empty
    )

    with info_col:
        if has_data:
            n_contents = len(st.session_state['sa_contents'])
            n_comments = len(st.session_state.get('sa_comments', []))
            st.success(f"✅ Showing **{n_contents}** contents with **{n_comments:,}** sampled comments")
        elif fetch_clicked:
            pass  # spinner shown below
        else:
            st.info("👆 Click **Fetch Data** to run a targeted Snowflake query with the settings above.")

    if fetch_clicked:
        with st.spinner("Fetching data from Snowflake…"):
            contents_df, comments_df = data_loader.load_sa_data(
                platform=selected_platform,
                brand=selected_brand,
                top_n=top_n,
                min_comments=min_comments,
                sort_by=sort_by_value,
                sentiments=selected_sentiments or None,
                intents=selected_intents or None,
                date_range=query_date_range,
            )
        st.session_state['sa_contents'] = contents_df
        st.session_state['sa_comments'] = comments_df
        st.session_state['sa_fetch_key'] = fetch_key
        st.session_state['sa_platform'] = selected_platform
        st.session_state['sa_brand'] = selected_brand
        # Reset pagination on new fetch
        st.session_state['sentiment_page'] = 1
        st.rerun()

    # ── Nothing fetched yet ────────────────────────────────────────────────────
    if not has_data and not fetch_clicked:
        return

    filtered_contents = st.session_state.get('sa_contents', pd.DataFrame())
    comments_df       = st.session_state.get('sa_comments', pd.DataFrame())

    if filtered_contents.empty:
        st.warning("No content data found with the selected filters. Try adjusting and re-fetching.")
        return

    # ── Summary stats ──────────────────────────────────────────────────────────
    st.markdown("### 📊 Summary")
    col1, col2, col3, col4 = st.columns(4)

    with col1:
        st.metric("Contents Analysed", len(filtered_contents))
    with col2:
        if 'selected_sentiment_percentage' in filtered_contents.columns:
            avg_pct = filtered_contents['selected_sentiment_percentage'].mean()
            label = "Selected Sentiment %" if selected_sentiments else "Avg Negative %"
            st.metric(label, f"{avg_pct:.1f}%")
        else:
            st.metric("Avg Negative %", f"{filtered_contents['negative_percentage'].mean():.1f}%")
    with col3:
        st.metric("Total Comments", int(filtered_contents['total_comments'].sum()))
    with col4:
        st.metric("Total Replies Needed", int(filtered_contents['reply_required_count'].sum()))

    st.markdown("---")

    # ── Engagement scatter ─────────────────────────────────────────────────────
    st.markdown("### 📈 Content Engagement Analysis")
    scatter = distribution_charts.create_engagement_scatter(
        filtered_contents, title="Content Engagement vs. Sentiment"
    )
    st.plotly_chart(scatter, use_container_width=True, key="engagement_scatter_chart")

    st.markdown("---")

    # ── Paginated content cards ────────────────────────────────────────────────
    st.markdown("### 🔍 Detailed Content Analysis")

    if 'sentiment_page' not in st.session_state:
        st.session_state.sentiment_page = 1

    items_per_page  = 5
    total_contents  = len(filtered_contents)
    total_pages     = (total_contents + items_per_page - 1) // items_per_page

    if total_contents > items_per_page:
        st.info(f"📄 Page {st.session_state.sentiment_page} of {total_pages} ({total_contents} total contents)")
        col_prev, col_info, col_next = st.columns([1, 2, 1])
        with col_prev:
            if st.button("⬅️ Previous", key="prev_top",
                         disabled=st.session_state.sentiment_page == 1):
                st.session_state.sentiment_page -= 1
                st.rerun()
        with col_info:
            st.markdown(
                f"<div style='text-align:center;padding-top:8px;'>"
                f"Page {st.session_state.sentiment_page} / {total_pages}</div>",
                unsafe_allow_html=True
            )
        with col_next:
            if st.button("Next ➡️", key="next_top",
                         disabled=st.session_state.sentiment_page >= total_pages):
                st.session_state.sentiment_page += 1
                st.rerun()
        st.markdown("---")

    start_idx = (st.session_state.sentiment_page - 1) * items_per_page
    end_idx   = min(start_idx + items_per_page, total_contents)
    paginated  = filtered_contents.iloc[start_idx:end_idx]

    for idx, (_, content_row) in enumerate(paginated.iterrows(), start_idx + 1):
        ContentCards.display_content_card(content_row, rank=idx)

        # Comments from the sampled set (pre-fetched, no extra Snowflake call)
        if not comments_df.empty and 'content_sk' in comments_df.columns:
            content_comments = comments_df[comments_df['content_sk'] == content_row['content_sk']]
        else:
            content_comments = pd.DataFrame()

        if content_comments.empty:
            st.info("No sampled comment details available for this content.")
        else:
            viz_col1, viz_col2 = st.columns(2)
            with viz_col1:
                pie = sentiment_charts.create_sentiment_pie_chart(
                    content_comments, title="Sentiment Distribution (sample)"
                )
                st.plotly_chart(pie, use_container_width=True,
                                key=f"sentiment_pie_{content_row['content_sk']}")
            with viz_col2:
                bar = distribution_charts.create_intent_bar_chart(
                    content_comments, title="Intent Distribution (sample)", orientation='h'
                )
                st.plotly_chart(bar, use_container_width=True,
                                key=f"intent_bar_{content_row['content_sk']}")

        # AI Analysis
        st.markdown("#### 🤖 AI-Powered Analysis")
        content_sk = content_row['content_sk']
        st.markdown("**Select analysis type:**")
        btn_col1, btn_col2, btn_col3 = st.columns(3)

        with btn_col1:
            gen_neg = st.button("📉 Negative Summary", key=f"ai_negative_{content_sk}",
                                use_container_width=True)
        with btn_col2:
            gen_combined = st.button("📊 Combined Summary", key=f"ai_combined_{content_sk}",
                                     use_container_width=True)
        with btn_col3:
            gen_pos = st.button("📈 Positive Summary", key=f"ai_positive_{content_sk}",
                                use_container_width=True)

        summary_type = None
        if gen_neg:
            summary_type = 'negative'
        elif gen_pos:
            summary_type = 'positive'
        elif gen_combined:
            summary_type = 'combined'

        key_neg = f"{content_sk}_negative"
        key_pos = f"{content_sk}_positive"
        key_com = f"{content_sk}_combined"

        if summary_type or any(k in st.session_state.content_summaries for k in (key_neg, key_pos, key_com)):
            if summary_type:
                summary_key = f"{content_sk}_{summary_type}"
                with st.spinner(f"Analysing {summary_type} comments with AI…"):
                    result = summary_agent.process({
                        'content_sk': content_sk,
                        'content_description': content_row['content_description'],
                        'comments': content_comments,
                        'sentiment_type': summary_type
                    })
                st.session_state.content_summaries[summary_key] = result

            for label, key in [('Negative', key_neg), ('Combined', key_com), ('Positive', key_pos)]:
                if key not in st.session_state.content_summaries:
                    continue
                result = st.session_state.content_summaries[key]
                if result['success']:
                    summary = result['summary']
                    with st.expander(f"📊 AI Analysis Report — {label}", expanded=True):
                        st.markdown("### Executive Summary")
                        st.info(summary['executive_summary'])
                        if summary['main_themes']:
                            st.markdown("### 🎯 Main Themes")
                            for theme in summary['main_themes']:
                                emoji = {'positive': '😊', 'negative': '😟', 'mixed': '🤔'}.get(
                                    theme.get('sentiment', 'mixed'), '🤔')
                                st.markdown(f"**{emoji} {theme.get('theme')}** ({theme.get('sentiment','mixed').title()})\n- {theme.get('description','')}")
                        col_p, col_c = st.columns(2)
                        with col_p:
                            st.markdown("### ✅ Praise Points")
                            for pt in summary.get('praise_points', []):
                                st.markdown(f"- {pt}")
                        with col_c:
                            st.markdown("### ⚠️ Key Complaints")
                            for pt in summary.get('key_complaints', []):
                                st.markdown(f"- {pt}")
                        col_f, col_i = st.columns(2)
                        with col_f:
                            st.markdown("### ❓ FAQs")
                            for q in summary.get('frequently_asked_questions', []):
                                st.markdown(f"- {q}")
                        with col_i:
                            st.markdown("### 💡 Insights")
                            for ins in summary.get('unexpected_insights', []):
                                st.markdown(f"- {ins}")
                        if summary.get('action_recommendations'):
                            st.markdown("### 🎯 Recommended Actions")
                            for action in summary['action_recommendations']:
                                priority = action.get('priority', 'medium').upper()
                                emoji = {'HIGH': '🔴', 'MEDIUM': '🟡', 'LOW': '🟢'}.get(priority, '🟡')
                                st.markdown(f"{emoji} **[{priority}]** {action.get('action','')}")
                        with st.expander("ℹ️ Analysis Metadata"):
                            meta = result.get('metadata', {})
                            mc1, mc2, mc3 = st.columns(3)
                            mc1.metric("Comments Analysed", meta.get('total_comments_analyzed', 0))
                            mc2.metric("Model Used", meta.get('model_used', 'N/A'))
                            mc3.metric("Tokens Used", meta.get('tokens_used', 0))
                else:
                    st.error(f"❌ AI analysis failed: {result.get('error','Unknown error')}")
                    if st.button("🔄 Retry", key=f"retry_{key}"):
                        del st.session_state.content_summaries[key]
                        st.rerun()

        # Comment expansion (text already loaded from fetch)
        st.markdown("#### 💬 View Comments by Sentiment")

        if not content_comments.empty:
            neg_comments = content_comments[
                content_comments['sentiment_polarity'].isin(['negative', 'very_negative'])
            ]
            pos_comments = content_comments[
                content_comments['sentiment_polarity'].isin(['positive', 'very_positive'])
            ]

            col_neg, col_pos = st.columns(2)
            with col_neg:
                with st.expander(f"📉 Negative Comments ({len(neg_comments)} sampled)", expanded=False):
                    if not neg_comments.empty:
                        for _, comment in neg_comments.iterrows():
                            ContentCards.display_comment_card(comment, show_original=True)
                    else:
                        st.info("No negative comments in sample.")
            with col_pos:
                with st.expander(f"📈 Positive Comments ({len(pos_comments)} sampled)", expanded=False):
                    if not pos_comments.empty:
                        for _, comment in pos_comments.iterrows():
                            ContentCards.display_comment_card(comment, show_original=True)
                    else:
                        st.info("No positive comments in sample.")
        else:
            st.info("No comments available for this content in the current sample.")

        st.markdown("---")

    # ── Bottom pagination ──────────────────────────────────────────────────────
    if total_contents > items_per_page:
        col_prev_b, col_info_b, col_next_b = st.columns([1, 2, 1])
        with col_prev_b:
            if st.button("⬅️ Previous", key="prev_bottom",
                         disabled=st.session_state.sentiment_page == 1):
                st.session_state.sentiment_page -= 1
                st.rerun()
        with col_info_b:
            st.markdown(
                f"<div style='text-align:center;padding-top:8px;'>"
                f"Page {st.session_state.sentiment_page} / {total_pages}</div>",
                unsafe_allow_html=True
            )
        with col_next_b:
            if st.button("Next ➡️", key="next_bottom",
                         disabled=st.session_state.sentiment_page >= total_pages):
                st.session_state.sentiment_page += 1
                st.rerun()

    st.markdown("---")

    # ── Insights & recommendations (using sampled comments) ───────────────────
    st.markdown("### 💡 Insights & Recommendations")

    from utils.data_processor import SentimentDataProcessor
    processor = SentimentDataProcessor()

    all_sampled = comments_df[
        comments_df['content_sk'].isin(filtered_contents['content_sk'])
    ] if not comments_df.empty else pd.DataFrame()

    insight_col1, insight_col2 = st.columns(2)
    with insight_col1:
        st.markdown("#### 🎯 Common Intent Patterns")
        if not all_sampled.empty:
            intent_dist = processor.get_intent_distribution(all_sampled)
            for _, row in intent_dist.sort_values('count', ascending=False).head(5).iterrows():
                st.markdown(f"- **{row['intent']}**: {row['count']} ({row['percentage']:.1f}%)")

    with insight_col2:
        st.markdown("#### 🌐 Platform Breakdown")
        if not all_sampled.empty:
            for platform, count in all_sampled['platform'].value_counts().items():
                pct = count / len(all_sampled) * 100
                st.markdown(f"- **{platform.title()}**: {count} comments ({pct:.1f}%)")

    st.markdown("---")

    # ── Action items ───────────────────────────────────────────────────────────
    st.markdown("### ✅ Recommended Actions")
    action_items = []

    total_replies = int(filtered_contents['reply_required_count'].sum())
    if total_replies > 0:
        action_items.append(f"🔴 **High Priority**: {total_replies} comments require immediate response")

    critical = filtered_contents[filtered_contents['negative_percentage'] > 50]
    if not critical.empty:
        action_items.append(
            f"🚨 **Critical**: {len(critical)} content(s) have >50% negative sentiment — investigate root causes"
        )

    if not all_sampled.empty:
        feedback_cnt = all_sampled['intent'].str.contains('feedback_negative', na=False).sum()
        if feedback_cnt:
            action_items.append(f"💬 **Feedback**: {feedback_cnt} negative-feedback comments — consider product improvements")

        question_cnt = all_sampled['intent'].str.contains('question', na=False).sum()
        if question_cnt:
            action_items.append(f"❓ **Questions**: {question_cnt} questions — improve FAQ or support docs")

    if action_items:
        for item in action_items:
            st.markdown(item)
    else:
        st.success("No critical action items at this time.")

    st.markdown("---")

    # ── Export ─────────────────────────────────────────────────────────────────
    st.markdown("### 💾 Export Data")
    col1, col2 = st.columns([1, 3])
    with col1:
        base_cols = ['content_sk', 'content_description', 'permalink_url',
                     'total_comments', 'reply_required_count', 'dominant_sentiment']
        for extra in ['selected_sentiment_count', 'selected_sentiment_percentage',
                      'negative_count', 'negative_percentage']:
            if extra in filtered_contents.columns:
                base_cols.append(extra)
        export_cols = [c for c in base_cols if c in filtered_contents.columns]
        csv = filtered_contents[export_cols].to_csv(index=False)
        st.download_button(
            label="📥 Download as CSV",
            data=csv,
            file_name=f"sentiment_analysis_top{top_n}.csv",
            mime="text/csv"
        )
    with col2:
        st.info("Download the data for further analysis or reporting.")