Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

File size: 25,335 Bytes
"""
Main Dashboard Page
Displays overall sentiment distributions by brand and platform
"""
import streamlit as st
import sys
from pathlib import Path

# Add parent directory to path
parent_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(parent_dir))

from utils.data_processor import SentimentDataProcessor
from utils.metrics import SentimentMetrics
from utils.pdf_exporter import DashboardPDFExporter
from visualizations.sentiment_charts import SentimentCharts
from visualizations.distribution_charts import DistributionCharts
from visualizations.demographic_charts import DemographicCharts
from visualizations.content_cards import ContentCards


def render_dashboard(df):
    """
    Render the main dashboard page

    Args:
        df: Sentiment dataframe
    """
    st.title("📊 Sentiment Analysis Dashboard")

    # ── PDF Report ────────────────────────────────────────────────────────────
    with st.expander("📄 Export PDF Report", expanded=False):
        st.markdown(
            "Generate a comprehensive PDF report of the current dashboard view. "
            "The report includes all charts, metrics, and a data summary. "
            "Active global filters are reflected in the report."
        )
        if st.button("Generate PDF Report", type="primary", use_container_width=True):
            with st.spinner("Generating PDF report — this may take 30–60 seconds…"):
                try:
                    # Build a human-readable description of active filters
                    global_filters = st.session_state.get("global_filters", {})
                    filter_info = {}
                    if global_filters.get("platforms"):
                        filter_info["Platforms"] = global_filters["platforms"]
                    if global_filters.get("brands"):
                        filter_info["Brands"] = global_filters["brands"]
                    if global_filters.get("sentiments"):
                        filter_info["Sentiments"] = global_filters["sentiments"]
                    if global_filters.get("date_range"):
                        dr = global_filters["date_range"]
                        filter_info["Date Range"] = f"{dr[0]} to {dr[1]}"

                    exporter = DashboardPDFExporter()
                    pdf_bytes = exporter.generate_report(df, filter_info or None)

                    filename = (
                        f"musora_sentiment_report_"
                        f"{__import__('datetime').datetime.now().strftime('%Y%m%d_%H%M')}.pdf"
                    )
                    st.success("Report generated successfully!")
                    st.download_button(
                        label="Download PDF Report",
                        data=pdf_bytes,
                        file_name=filename,
                        mime="application/pdf",
                        use_container_width=True,
                    )
                except Exception as e:
                    st.error(f"Failed to generate report: {e}")
                    st.exception(e)

    st.markdown("---")

    # Performance tip
    if len(df) > 10000:
        st.info(f"💡 **Performance Tip**: Loaded {len(df):,} comments. Use the global filters in the sidebar to narrow down your analysis for faster performance.")

    st.markdown("---")

    # Initialize components
    sentiment_charts = SentimentCharts()
    distribution_charts = DistributionCharts()
    processor = SentimentDataProcessor()

    # Display overall summary statistics
    ContentCards.display_summary_stats(df)

    st.markdown("---")

    # Calculate overall metrics
    overall_metrics = SentimentMetrics.calculate_overall_metrics(df)

    # Display health indicator
    col1, col2, col3 = st.columns([1, 2, 1])
    with col2:
        ContentCards.display_health_indicator(overall_metrics['negative_pct'])

    st.markdown("---")

    # Overall sentiment distribution
    st.markdown("## 🎯 Overall Sentiment Distribution")

    col1, col2 = st.columns(2)

    with col1:
        # Sentiment pie chart
        sentiment_pie = sentiment_charts.create_sentiment_pie_chart(df, title="Overall Sentiment Distribution")
        st.plotly_chart(sentiment_pie, use_container_width=True)

    with col2:
        # Sentiment score gauge
        sentiment_gauge = sentiment_charts.create_sentiment_score_gauge(
            overall_metrics['avg_sentiment_score'],
            title="Overall Sentiment Score"
        )
        st.plotly_chart(sentiment_gauge, use_container_width=True)

        # Additional metrics
        metric_col1, metric_col2 = st.columns(2)
        with metric_col1:
            st.metric("Positive %", f"{overall_metrics['positive_pct']:.1f}%")
        with metric_col2:
            st.metric("Reply Rate %", f"{overall_metrics['reply_required_pct']:.1f}%")

    st.markdown("---")

    # Sentiment by Brand
    st.markdown("## 🏢 Sentiment Analysis by Brand")

    col1, col2 = st.columns(2)

    with col1:
        # Stacked bar chart
        brand_sentiment_bar = sentiment_charts.create_sentiment_bar_chart(
            df, group_by='brand', title="Sentiment Distribution by Brand"
        )
        st.plotly_chart(brand_sentiment_bar, use_container_width=True)

    with col2:
        # Percentage bar chart
        brand_sentiment_pct = sentiment_charts.create_sentiment_percentage_bar_chart(
            df, group_by='brand', title="Sentiment Distribution by Brand (%)"
        )
        st.plotly_chart(brand_sentiment_pct, use_container_width=True)

    # Brand metrics table
    with st.expander("📈 Detailed Brand Metrics"):
        brand_metrics = SentimentMetrics.calculate_brand_metrics(df)

        brand_data = []
        for brand, metrics in brand_metrics.items():
            brand_data.append({
                'Brand': brand.title(),
                'Total Comments': metrics['total_comments'],
                'Replies Needed': metrics['total_reply_required'],
                'Negative %': f"{metrics['negative_pct']:.1f}%",
                'Positive %': f"{metrics['positive_pct']:.1f}%",
                'Avg Sentiment Score': f"{metrics['avg_sentiment_score']:.2f}"
            })

        st.table(brand_data)

    st.markdown("---")

    # Sentiment by Platform
    st.markdown("## 🌐 Sentiment Analysis by Platform")

    col1, col2 = st.columns(2)

    with col1:
        # Stacked bar chart
        platform_sentiment_bar = sentiment_charts.create_sentiment_bar_chart(
            df, group_by='platform', title="Sentiment Distribution by Platform"
        )
        st.plotly_chart(platform_sentiment_bar, use_container_width=True)

    with col2:
        # Percentage bar chart
        platform_sentiment_pct = sentiment_charts.create_sentiment_percentage_bar_chart(
            df, group_by='platform', title="Sentiment Distribution by Platform (%)"
        )
        st.plotly_chart(platform_sentiment_pct, use_container_width=True)

    # Platform metrics table
    with st.expander("📈 Detailed Platform Metrics"):
        platform_metrics = SentimentMetrics.calculate_platform_metrics(df)

        platform_data = []
        for platform, metrics in platform_metrics.items():
            platform_data.append({
                'Platform': platform.title(),
                'Total Comments': metrics['total_comments'],
                'Replies Needed': metrics['total_reply_required'],
                'Negative %': f"{metrics['negative_pct']:.1f}%",
                'Positive %': f"{metrics['positive_pct']:.1f}%",
                'Avg Sentiment Score': f"{metrics['avg_sentiment_score']:.2f}"
            })

        st.table(platform_data)

    st.markdown("---")

    # Intent Analysis
    st.markdown("## 🎭 Intent Analysis")

    col1, col2 = st.columns(2)

    with col1:
        # Intent bar chart
        intent_bar = distribution_charts.create_intent_bar_chart(
            df, title="Intent Distribution", orientation='h'
        )
        st.plotly_chart(intent_bar, use_container_width=True)

    with col2:
        # Intent pie chart
        intent_pie = distribution_charts.create_intent_pie_chart(df, title="Intent Distribution")
        st.plotly_chart(intent_pie, use_container_width=True)

    st.markdown("---")

    # Brand-Platform Matrix
    st.markdown("## 🔀 Cross-Dimensional Analysis")

    col1, col2 = st.columns(2)

    with col1:
        # Heatmap showing comment distribution
        brand_platform_matrix = distribution_charts.create_brand_platform_matrix(
            df, title="Brand-Platform Comment Matrix"
        )
        st.plotly_chart(brand_platform_matrix, use_container_width=True)

    with col2:
        # Sentiment heatmap
        sentiment_heatmap = sentiment_charts.create_sentiment_heatmap(
            df, row_dimension='brand', col_dimension='platform', title="Negative Sentiment Heatmap"
        )
        st.plotly_chart(sentiment_heatmap, use_container_width=True)

    st.markdown("---")

    # Platform and Brand Distribution
    st.markdown("## 📊 Volume Analysis")

    col1, col2 = st.columns(2)

    with col1:
        # Platform distribution
        platform_dist = distribution_charts.create_platform_distribution(df, title="Comments by Platform")
        st.plotly_chart(platform_dist, use_container_width=True)

    with col2:
        # Brand distribution
        brand_dist = distribution_charts.create_brand_distribution(df, title="Comments by Brand")
        st.plotly_chart(brand_dist, use_container_width=True)

    st.markdown("---")

    # Reply Requirements
    st.markdown("## ⚠️ Reply Requirements Analysis")

    col1, col2 = st.columns(2)

    with col1:
        # Reply required by brand
        reply_brand = distribution_charts.create_reply_required_chart(
            df, group_by='brand', title="Comments Requiring Reply by Brand"
        )
        st.plotly_chart(reply_brand, use_container_width=True)

    with col2:
        # Reply required by platform
        reply_platform = distribution_charts.create_reply_required_chart(
            df, group_by='platform', title="Comments Requiring Reply by Platform"
        )
        st.plotly_chart(reply_platform, use_container_width=True)

    # Response urgency metrics
    urgency_metrics = SentimentMetrics.calculate_response_urgency(df)

    st.markdown("### 🚨 Response Urgency Breakdown")
    urgency_col1, urgency_col2, urgency_col3, urgency_col4 = st.columns(4)

    with urgency_col1:
        st.metric("🔴 Urgent", urgency_metrics['urgent_count'], help="Negative sentiment + requires reply")

    with urgency_col2:
        st.metric("🟠 High Priority", urgency_metrics['high_priority_count'], help="Neutral with feedback/request")

    with urgency_col3:
        st.metric("🟡 Medium Priority", urgency_metrics['medium_priority_count'], help="Positive requiring reply")

    with urgency_col4:
        st.metric("🟢 Low Priority", urgency_metrics['low_priority_count'], help="Very positive requiring reply")

    st.markdown("---")

    st.markdown("---")

    # Demographics Analysis (for musora_app only)
    # Check if we have musora_app data and demographic fields
    has_musora_app = 'platform' in df.columns and 'musora_app' in df['platform'].values
    has_demographics = (
        has_musora_app and
        'age_group' in df.columns and
        'timezone' in df.columns and
        'experience_level' in df.columns
    )

    if has_demographics:
        # Filter for musora_app data only
        df_musora = df[df['platform'] == 'musora_app'].copy()

        # Check if we have any demographic data (not all Unknown)
        has_valid_demographics = (
            (df_musora['age_group'] != 'Unknown').any() or
            (df_musora['timezone_region'] != 'Unknown').any() or
            (df_musora['experience_group'] != 'Unknown').any()
        )

        if has_valid_demographics and len(df_musora) > 0:
            st.markdown("## 👥 Demographics Analysis (Musora App)")
            st.info(f"📊 Analyzing demographics for **{len(df_musora):,}** Musora App comments")

            # Initialize demographic charts
            demographic_charts = DemographicCharts()

            # Get demographic summary
            demo_summary = processor.get_demographics_summary(df_musora)

            # Display summary metrics
            demo_col1, demo_col2, demo_col3, demo_col4 = st.columns(4)

            with demo_col1:
                st.metric(
                    "Comments with Demographics",
                    f"{demo_summary['users_with_demographics']:,}",
                    f"{demo_summary['coverage_percentage']:.1f}% coverage"
                )

            with demo_col2:
                if demo_summary['avg_age'] is not None:
                    st.metric("Average Age", f"{demo_summary['avg_age']:.1f} years")
                else:
                    st.metric("Average Age", "N/A")

            with demo_col3:
                st.metric("Most Common Region", demo_summary['most_common_region'])

            with demo_col4:
                if demo_summary['avg_experience'] is not None:
                    st.metric("Avg Experience", f"{demo_summary['avg_experience']:.1f}/10")
                else:
                    st.metric("Avg Experience", "N/A")

            st.markdown("---")

            # Age Analysis
            st.markdown("### 🎂 Age Distribution")

            age_dist = processor.get_demographics_distribution(df_musora, 'age_group')
            age_sentiment = processor.get_demographics_by_sentiment(df_musora, 'age_group')

            if not age_dist.empty:
                col1, col2 = st.columns(2)

                with col1:
                    age_chart = demographic_charts.create_age_distribution_chart(
                        age_dist,
                        title="Comments by Age Group"
                    )
                    st.plotly_chart(age_chart, use_container_width=True)

                with col2:
                    age_sent_chart = demographic_charts.create_age_sentiment_chart(
                        age_sentiment,
                        title="Sentiment Distribution by Age Group"
                    )
                    st.plotly_chart(age_sent_chart, use_container_width=True)

                # Insights
                with st.expander("💡 Age Insights"):
                    if len(age_dist) > 0:
                        top_age_group = age_dist.iloc[0]['age_group']
                        top_age_count = age_dist.iloc[0]['count']
                        top_age_pct = age_dist.iloc[0]['percentage']

                        st.write(f"**Most Active Age Group:** {top_age_group} ({top_age_count:,} comments, {top_age_pct:.1f}%)")

                        # Find age group with most negative sentiment
                        if not age_sentiment.empty:
                            negative_sentiments = age_sentiment[
                                age_sentiment['sentiment_polarity'].isin(['negative', 'very_negative'])
                            ].groupby('age_group')['percentage'].sum().reset_index()

                            if len(negative_sentiments) > 0:
                                negative_sentiments = negative_sentiments.sort_values('percentage', ascending=False)
                                most_negative_age = negative_sentiments.iloc[0]['age_group']
                                most_negative_pct = negative_sentiments.iloc[0]['percentage']
                                st.write(f"**Highest Negative Sentiment:** {most_negative_age} ({most_negative_pct:.1f}% negative)")
            else:
                st.info("No age data available for visualization")

            st.markdown("---")

            # Timezone Analysis
            st.markdown("### 🌍 Geographic Distribution")

            # Get timezone data
            top_timezones = processor.get_top_timezones(df_musora, top_n=15)
            region_dist = processor.get_timezone_regions_distribution(df_musora)
            region_sentiment = processor.get_demographics_by_sentiment(df_musora, 'timezone_region')

            if not top_timezones.empty or not region_dist.empty:
                # Top timezones
                if not top_timezones.empty:
                    st.markdown("#### Top 15 Timezones")
                    timezone_chart = demographic_charts.create_timezone_chart(
                        top_timezones,
                        title="Most Common Timezones",
                        top_n=15
                    )
                    st.plotly_chart(timezone_chart, use_container_width=True)

                # Regional distribution
                if not region_dist.empty:
                    st.markdown("#### Regional Distribution")
                    col1, col2 = st.columns(2)

                    with col1:
                        region_chart = demographic_charts.create_region_distribution_chart(
                            region_dist,
                            title="Comments by Region"
                        )
                        st.plotly_chart(region_chart, use_container_width=True)

                    with col2:
                        if not region_sentiment.empty:
                            region_sent_chart = demographic_charts.create_region_sentiment_chart(
                                region_sentiment,
                                title="Sentiment Distribution by Region"
                            )
                            st.plotly_chart(region_sent_chart, use_container_width=True)

                # Insights
                with st.expander("💡 Geographic Insights"):
                    if not top_timezones.empty:
                        top_tz = top_timezones.iloc[0]['timezone']
                        top_tz_count = top_timezones.iloc[0]['count']
                        top_tz_pct = top_timezones.iloc[0]['percentage']
                        st.write(f"**Most Common Timezone:** {top_tz} ({top_tz_count:,} comments, {top_tz_pct:.1f}%)")

                    if not region_dist.empty:
                        top_region = region_dist.iloc[0]['timezone_region']
                        top_region_count = region_dist.iloc[0]['count']
                        top_region_pct = region_dist.iloc[0]['percentage']
                        st.write(f"**Most Active Region:** {top_region} ({top_region_count:,} comments, {top_region_pct:.1f}%)")

                        # Find region with most negative sentiment
                        if not region_sentiment.empty:
                            negative_regions = region_sentiment[
                                region_sentiment['sentiment_polarity'].isin(['negative', 'very_negative'])
                            ].groupby('timezone_region')['percentage'].sum().reset_index()

                            if len(negative_regions) > 0:
                                negative_regions = negative_regions.sort_values('percentage', ascending=False)
                                most_negative_region = negative_regions.iloc[0]['timezone_region']
                                most_negative_region_pct = negative_regions.iloc[0]['percentage']
                                st.write(f"**Highest Negative Sentiment:** {most_negative_region} ({most_negative_region_pct:.1f}% negative)")
            else:
                st.info("No timezone/region data available for visualization")

            st.markdown("---")

            # Experience Level Analysis
            st.markdown("### 🎯 Experience Level Distribution")

            # Get both detailed and grouped experience data
            exp_dist_detailed = processor.get_experience_level_distribution(df_musora, use_groups=False)
            exp_dist_grouped = processor.get_experience_level_distribution(df_musora, use_groups=True)
            exp_sentiment_grouped = processor.get_demographics_by_sentiment(df_musora, 'experience_group')

            if not exp_dist_detailed.empty or not exp_dist_grouped.empty:
                # Tabs for detailed vs grouped view
                tab1, tab2 = st.tabs(["📊 Detailed (0-10)", "📊 Grouped (Beginner/Intermediate/Advanced)"])

                with tab1:
                    if not exp_dist_detailed.empty:
                        exp_chart_detailed = demographic_charts.create_experience_distribution_chart(
                            exp_dist_detailed,
                            title="Comments by Experience Level (0-10 Scale)",
                            use_groups=False
                        )
                        st.plotly_chart(exp_chart_detailed, use_container_width=True)
                    else:
                        st.info("No detailed experience level data available")

                with tab2:
                    if not exp_dist_grouped.empty:
                        col1, col2 = st.columns(2)

                        with col1:
                            exp_chart_grouped = demographic_charts.create_experience_distribution_chart(
                                exp_dist_grouped,
                                title="Comments by Experience Group",
                                use_groups=True
                            )
                            st.plotly_chart(exp_chart_grouped, use_container_width=True)

                        with col2:
                            if not exp_sentiment_grouped.empty:
                                exp_sent_chart = demographic_charts.create_experience_sentiment_chart(
                                    exp_sentiment_grouped,
                                    title="Sentiment by Experience Group",
                                    use_groups=True
                                )
                                st.plotly_chart(exp_sent_chart, use_container_width=True)
                    else:
                        st.info("No grouped experience level data available")

                # Insights
                with st.expander("💡 Experience Insights"):
                    if not exp_dist_grouped.empty:
                        top_exp_group = exp_dist_grouped.iloc[0]['experience_group']
                        top_exp_count = exp_dist_grouped.iloc[0]['count']
                        top_exp_pct = exp_dist_grouped.iloc[0]['percentage']
                        st.write(f"**Most Active Group:** {top_exp_group} ({top_exp_count:,} comments, {top_exp_pct:.1f}%)")

                        # Find experience group with most negative sentiment
                        if not exp_sentiment_grouped.empty:
                            negative_exp = exp_sentiment_grouped[
                                exp_sentiment_grouped['sentiment_polarity'].isin(['negative', 'very_negative'])
                            ].groupby('experience_group')['percentage'].sum().reset_index()

                            if len(negative_exp) > 0:
                                negative_exp = negative_exp.sort_values('percentage', ascending=False)
                                most_negative_exp = negative_exp.iloc[0]['experience_group']
                                most_negative_exp_pct = negative_exp.iloc[0]['percentage']
                                st.write(f"**Highest Negative Sentiment:** {most_negative_exp} ({most_negative_exp_pct:.1f}% negative)")

                    if demo_summary['avg_experience'] is not None:
                        st.write(f"**Average Experience Level:** {demo_summary['avg_experience']:.2f}/10")
                        st.write(f"**Most Common Experience Group:** {demo_summary.get('most_common_experience', 'Unknown')}")
            else:
                st.info("No experience level data available for visualization")

            st.markdown("---")

    # Language Distribution (if available)
    if 'detected_language' in df.columns:
        st.markdown("## 🌍 Language Distribution")

        lang_dist = distribution_charts.create_language_distribution(df, top_n=10, title="Top 10 Languages")
        st.plotly_chart(lang_dist, use_container_width=True)

    st.markdown("---")

    # Temporal trends (if timestamp available)
    if 'comment_timestamp' in df.columns and not df.empty:
        with st.expander("📈 Temporal Trends", expanded=False):
            # Frequency selector
            freq_col1, freq_col2 = st.columns([1, 3])

            with freq_col1:
                freq = st.selectbox(
                    "Time Granularity",
                    options=['D', 'W', 'M'],
                    format_func=lambda x: {'D': 'Daily', 'W': 'Weekly', 'M': 'Monthly'}[x],
                    index=1  # Default to Weekly
                )

            sentiment_timeline = sentiment_charts.create_sentiment_timeline(df, freq=freq, title="Sentiment Trends Over Time")
            st.plotly_chart(sentiment_timeline, use_container_width=True)

    # Hierarchical sunburst
    with st.expander("🌟 Hierarchical View", expanded=False):
        st.markdown("**Interactive Brand > Platform > Sentiment Distribution**")
        sunburst = distribution_charts.create_combined_distribution_sunburst(
            df, title="Brand > Platform > Sentiment Distribution"
        )
        st.plotly_chart(sunburst, use_container_width=True)