Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Running

File size: 36,939 Bytes
"""
PDF Report Exporter for Musora Sentiment Analysis Dashboard.

Generates a comprehensive PDF report from the filtered dashboard data and
Plotly visualizations.

Dependencies:
    fpdf2  — PDF assembly  (pip install fpdf2)
    kaleido — Plotly PNG rendering (pip install kaleido)
"""

import os
import sys
import tempfile
import logging
from datetime import datetime
from pathlib import Path

# Ensure the visualization package root is importable when this module is
# loaded directly (e.g., during testing outside Streamlit).
_parent = Path(__file__).resolve().parent.parent
if str(_parent) not in sys.path:
    sys.path.insert(0, str(_parent))

import plotly.io as pio
from fpdf import FPDF

from utils.metrics import SentimentMetrics
from utils.data_processor import SentimentDataProcessor
from visualizations.sentiment_charts import SentimentCharts
from visualizations.distribution_charts import DistributionCharts
from visualizations.demographic_charts import DemographicCharts

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Section descriptions — plain-language context shown below each section header.
# ---------------------------------------------------------------------------
_DESCRIPTIONS = {
    "executive_summary": (
        "A top-level snapshot of community sentiment across all Musora brands and platforms. "
        "All findings are based on comments processed through the AI sentiment analysis pipeline."
    ),
    "sentiment": (
        "Every comment is assigned one of five sentiment levels: "
        "Very Positive, Positive, Neutral, Negative, or Very Negative. "
        "The pie chart shows how those levels split across all analyzed comments. "
        "The Sentiment Score (0-100) converts the average rating to a percentage scale: "
        "50 = perfectly neutral, above 60 = primarily positive."
    ),
    "brand": (
        "Sentiment broken down by Musora brand (Drumeo, Pianote, Guitareo, Singeo, etc.). "
        "Shows both the count and percentage of each sentiment level per brand, "
        "helping identify which brands receive the most positive or negative feedback."
    ),
    "platform": (
        "Sentiment broken down by platform (Facebook, Instagram, YouTube, Twitter, Musora App). "
        "Helps compare audience sentiment across channels."
    ),
    "intent": (
        "Beyond positive/negative, the AI identifies the intent behind each comment: "
        "praise, questions, requests, feedback, suggestions, humor, off-topic, or spam. "
        "Understanding intent helps prioritize community management."
    ),
    "cross_dimensional": (
        "Cross-dimensional analysis reveals patterns across both brand and platform simultaneously. "
        "The heatmaps show comment volume and negative sentiment concentration by combination."
    ),
    "volume": (
        "Volume analysis shows the distribution of comments across platforms and brands, "
        "indicating where the most community engagement is happening."
    ),
    "reply_requirements": (
        "Comments flagged as requiring a reply, broken down by brand and platform. "
        "The urgency breakdown helps prioritize community management resources."
    ),
    "demographics": (
        "Demographics data is available for Musora App comments and is derived from user profiles. "
        "Note: These charts reflect only users who have filled in their profile information - "
        "they do not represent all community members."
    ),
    "language": (
        "Language distribution shows what languages comments are written in. "
        "Non-English comments are automatically translated for analysis."
    ),
}

# ---------------------------------------------------------------------------
# Musora brand colours
# ---------------------------------------------------------------------------
_PRIMARY_HEX = "#1982C4"
_PRIMARY_RGB = (25, 130, 196)


# ---------------------------------------------------------------------------
# PDF document class
# ---------------------------------------------------------------------------

class MusoraPDF(FPDF):
    """Custom FPDF subclass with Musora branding and layout helpers."""

    PRIMARY = _PRIMARY_RGB
    WHITE = (255, 255, 255)
    GRAY = (180, 180, 180)
    LIGHT_GRAY = (240, 240, 240)

    def __init__(self):
        super().__init__(orientation="P", unit="mm", format="A4")
        self.set_auto_page_break(auto=True, margin=20)

    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------

    @staticmethod
    def _sanitize(text: str) -> str:
        """Strip characters outside Latin-1 (required by the Helvetica font)."""
        if not isinstance(text, str):
            text = str(text)
        return text.encode("latin-1", errors="ignore").decode("latin-1")

    # ------------------------------------------------------------------
    # FPDF overrides
    # ------------------------------------------------------------------

    def header(self):
        if self.page_no() > 1:
            self.set_font("Helvetica", "B", 8)
            self.set_text_color(*self.GRAY)
            self.cell(0, 6, "Musora Sentiment Analysis Report", align="L")
            self.cell(
                0, 6, f"Page {self.page_no()}", align="R",
                new_x="LMARGIN", new_y="NEXT",
            )
            self.set_draw_color(*self.PRIMARY)
            self.set_line_width(0.5)
            self.line(10, self.get_y(), 200, self.get_y())
            self.ln(4)

    def footer(self):
        self.set_y(-15)
        self.set_font("Helvetica", "I", 7)
        self.set_text_color(*self.GRAY)
        self.cell(
            0, 10,
            f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')} | Confidential",
            align="C",
        )

    # ------------------------------------------------------------------
    # Layout primitives
    # ------------------------------------------------------------------

    def check_page_break(self, needed_mm: float) -> None:
        """Add a page break if less than *needed_mm* mm remain on the page."""
        if self.get_y() + needed_mm > self.h - 20:
            self.add_page()

    def section_header(self, title: str) -> None:
        """Bold, brand-coloured section heading with an underline rule."""
        title = self._sanitize(title)
        self.check_page_break(20)
        self.ln(4)
        self.set_font("Helvetica", "B", 14)
        self.set_text_color(*self.PRIMARY)
        self.cell(0, 10, title, new_x="LMARGIN", new_y="NEXT")
        self.set_draw_color(*self.PRIMARY)
        self.set_line_width(0.3)
        self.line(10, self.get_y(), 200, self.get_y())
        self.ln(3)
        self.set_text_color(0, 0, 0)

    def subsection_header(self, title: str) -> None:
        """Lighter subsection heading."""
        title = self._sanitize(title)
        self.check_page_break(15)
        self.ln(2)
        self.set_font("Helvetica", "B", 11)
        self.set_text_color(60, 60, 60)
        self.cell(0, 8, title, new_x="LMARGIN", new_y="NEXT")
        self.ln(1)
        self.set_text_color(0, 0, 0)

    def section_description(self, text: str) -> None:
        """Italicised description block beneath a section header."""
        text = self._sanitize(text)
        self.set_font("Helvetica", "I", 9)
        self.set_text_color(80, 80, 80)
        self.multi_cell(0, 5, text)
        self.ln(4)
        self.set_text_color(0, 0, 0)

    def body_text(self, text: str) -> None:
        """Standard paragraph text."""
        text = self._sanitize(text)
        self.set_font("Helvetica", "", 9)
        self.set_text_color(50, 50, 50)
        self.multi_cell(0, 5, text)
        self.ln(2)
        self.set_text_color(0, 0, 0)

    def callout_box(
        self,
        text: str,
        bg_color: tuple = (240, 248, 255),
        border_color: tuple = None,
    ) -> None:
        """Lightly-coloured info/callout box with a left accent bar."""
        if border_color is None:
            border_color = self.PRIMARY
        text = self._sanitize(text)
        self.check_page_break(20)
        x, w = 10, 180
        approx_lines = max(2, len(text) // 90 + text.count("\n") + 1)
        h = approx_lines * 5 + 6
        y = self.get_y()
        self.set_fill_color(*bg_color)
        self.rect(x, y, w, h, style="F")
        self.set_fill_color(*border_color)
        self.rect(x, y, 3, h, style="F")
        self.set_font("Helvetica", "", 8.5)
        self.set_text_color(40, 40, 40)
        self.set_xy(x + 5, y + 3)
        self.multi_cell(w - 7, 4.5, text)
        self.set_y(y + h + 3)
        self.set_text_color(0, 0, 0)

    def metric_row(self, metrics: list) -> None:
        """
        Horizontal row of metric tiles.

        Args:
            metrics: list of (label, value) tuples.
        """
        self.check_page_break(18)
        n = len(metrics)
        if n == 0:
            return
        box_w = (190 - (n - 1) * 3) / n
        x0 = 10
        y = self.get_y()
        for i, (label, value) in enumerate(metrics):
            x = x0 + i * (box_w + 3)
            self.set_fill_color(245, 245, 245)
            self.rect(x, y, box_w, 14, style="F")
            self.set_xy(x, y + 1)
            self.set_font("Helvetica", "B", 10)
            self.set_text_color(*self.PRIMARY)
            self.cell(box_w, 6, self._sanitize(str(value)), align="C")
            self.set_xy(x, y + 7)
            self.set_font("Helvetica", "", 7)
            self.set_text_color(100, 100, 100)
            self.cell(box_w, 5, self._sanitize(str(label)), align="C")
        self.set_text_color(0, 0, 0)
        self.set_y(y + 16)

    def add_table(
        self,
        headers: list,
        rows: list,
        col_widths: list = None,
    ) -> None:
        """
        Styled data table with alternating row shading.

        Args:
            headers: Column header strings.
            rows: List of row tuples/lists.
            col_widths: Optional column widths in mm.
        """
        self.check_page_break(10 + len(rows) * 6)
        n = len(headers)
        if col_widths is None:
            col_widths = [190 / n] * n
        # Header
        self.set_font("Helvetica", "B", 8)
        self.set_fill_color(*self.PRIMARY)
        self.set_text_color(*self.WHITE)
        for i, hdr in enumerate(headers):
            self.cell(col_widths[i], 7, self._sanitize(hdr), border=1, fill=True, align="C")
        self.ln()
        # Rows
        self.set_font("Helvetica", "", 8)
        self.set_text_color(0, 0, 0)
        for row_idx, row in enumerate(rows):
            self.set_fill_color(250, 250, 250) if row_idx % 2 == 0 else self.set_fill_color(*self.WHITE)
            for i, cell_val in enumerate(row):
                self.cell(col_widths[i], 6, self._sanitize(str(cell_val)), border=1, fill=True, align="C")
            self.ln()
        self.ln(2)


# ---------------------------------------------------------------------------
# Main exporter
# ---------------------------------------------------------------------------

class DashboardPDFExporter:
    """
    Generates a comprehensive PDF report from the Musora Sentiment dashboard.

    Usage::

        exporter = DashboardPDFExporter()
        pdf_bytes = exporter.generate_report(filtered_df, filter_info)

    The *filter_info* dict (optional) maps human-readable filter names to their
    selected values and is shown on the cover page.
    """

    # Kaleido scale factor: 3× ≈ 300 DPI at A4 print size.
    RENDER_SCALE = 3

    def __init__(self):
        self.sentiment_charts = SentimentCharts()
        self.distribution_charts = DistributionCharts()
        self.demographic_charts = DemographicCharts()
        self.processor = SentimentDataProcessor()
        self._temp_files: list[str] = []

    # ------------------------------------------------------------------
    # Public entry point
    # ------------------------------------------------------------------

    def generate_report(self, df, filter_info: dict = None) -> bytes:
        """
        Build and return the full PDF report.

        Args:
            df: Filtered dashboard DataFrame.
            filter_info: Optional dict of active filter descriptions shown on
                         the cover page, e.g. {"Platforms": ["facebook"],
                         "Brands": ["drumeo"]}.

        Returns:
            bytes: Raw PDF file contents ready for st.download_button.
        """
        self.pdf = MusoraPDF()
        try:
            self._add_cover_page(df, filter_info)
            self._add_executive_summary(df)
            self._add_sentiment_section(df)
            self._add_brand_section(df)
            self._add_platform_section(df)
            self._add_intent_section(df)
            self._add_cross_dimensional_section(df)
            self._add_volume_section(df)
            self._add_reply_requirements_section(df)
            if self._has_demographics(df):
                self._add_demographics_section(df)
            if "detected_language" in df.columns:
                self._add_language_section(df)
            self._add_data_summary(df, filter_info)

            return bytes(self.pdf.output())
        finally:
            self._cleanup_temp_files()

    # ------------------------------------------------------------------
    # Chart rendering helpers
    # ------------------------------------------------------------------

    def _prepare_fig_for_pdf(self, fig, is_side_by_side: bool = False) -> None:
        """Apply white background, readable fonts, and automargin to a Plotly figure."""
        base_fs = 13 if is_side_by_side else 14
        fig.update_layout(
            paper_bgcolor="white",
            plot_bgcolor="white",
            font=dict(color="black", size=base_fs),
            title_font_size=base_fs + 4,
            margin=(
                dict(l=60, r=40, t=60, b=60)
                if is_side_by_side
                else dict(l=80, r=40, t=60, b=80)
            ),
        )
        fig.update_xaxes(automargin=True)
        fig.update_yaxes(automargin=True)
        if fig.layout.showlegend is not False:
            fig.update_layout(legend_font_size=base_fs - 2)

    def _fig_to_temp_path(
        self, fig, width: int = 800, height: int = 400, is_side_by_side: bool = False
    ) -> str:
        """Render a Plotly figure to a temporary high-DPI PNG and return the path."""
        self._prepare_fig_for_pdf(fig, is_side_by_side=is_side_by_side)
        img_bytes = pio.to_image(
            fig,
            format="png",
            width=width,
            height=height,
            scale=self.RENDER_SCALE,
            engine="kaleido",
        )
        tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
        tmp.write(img_bytes)
        tmp.close()
        self._temp_files.append(tmp.name)
        return tmp.name

    def _add_chart(self, fig, width: int = 180, img_width: int = 800, img_height: int = 400) -> None:
        """Render one figure full-width on the current PDF page."""
        try:
            path = self._fig_to_temp_path(fig, img_width, img_height)
            h_mm = width * (img_height / img_width)
            self.pdf.check_page_break(h_mm + 5)
            self.pdf.image(path, x=10, w=width)
            self.pdf.ln(3)
        except Exception:
            logger.exception("Chart render failed")
            self.pdf.body_text("[Chart could not be rendered]")

    def _add_two_charts(self, fig1, fig2, width: int = 92) -> None:
        """Render two figures side-by-side."""
        try:
            p1 = self._fig_to_temp_path(fig1, 700, 450, is_side_by_side=True)
            p2 = self._fig_to_temp_path(fig2, 700, 450, is_side_by_side=True)
            h_mm = width * (450 / 700)
            self.pdf.check_page_break(h_mm + 5)
            y = self.pdf.get_y()
            self.pdf.image(p1, x=10, y=y, w=width)
            self.pdf.image(p2, x=10 + width + 4, y=y, w=width)
            self.pdf.set_y(y + h_mm + 3)
        except Exception:
            logger.exception("Side-by-side chart render failed")
            self.pdf.body_text("[Charts could not be rendered]")

    def _cleanup_temp_files(self) -> None:
        for path in self._temp_files:
            try:
                os.unlink(path)
            except OSError:
                pass
        self._temp_files.clear()

    # ------------------------------------------------------------------
    # Data helpers
    # ------------------------------------------------------------------

    @staticmethod
    def _has_demographics(df) -> bool:
        return (
            "platform" in df.columns
            and "musora_app" in df["platform"].values
            and "age_group" in df.columns
            and "timezone" in df.columns
            and "experience_level" in df.columns
        )

    @staticmethod
    def _filter_summary(filter_info: dict) -> str:
        if not filter_info:
            return "No filters applied - showing all data."
        parts = []
        for key, value in filter_info.items():
            if value:
                display = (
                    value if isinstance(value, str)
                    else ", ".join(str(v) for v in value)
                )
                parts.append(f"{key}: {display}")
        return "; ".join(parts) if parts else "No filters applied."

    @staticmethod
    def _date_range_str(df) -> str:
        if "comment_timestamp" not in df.columns or df.empty:
            return "N/A"
        valid = df["comment_timestamp"].dropna()
        if valid.empty:
            return "N/A"
        return (
            f"{valid.min().strftime('%b %d, %Y')} to {valid.max().strftime('%b %d, %Y')}"
        )

    # ------------------------------------------------------------------
    # Report sections
    # ------------------------------------------------------------------

    def _add_cover_page(self, df, filter_info: dict) -> None:
        self.pdf.add_page()
        self.pdf.ln(40)

        r, g, b = MusoraPDF.PRIMARY
        self.pdf.set_fill_color(r, g, b)
        self.pdf.rect(0, 60, 210, 4, style="F")

        self.pdf.ln(20)
        self.pdf.set_font("Helvetica", "B", 28)
        self.pdf.set_text_color(r, g, b)
        self.pdf.cell(0, 15, "Musora", align="C", new_x="LMARGIN", new_y="NEXT")

        self.pdf.set_font("Helvetica", "", 16)
        self.pdf.set_text_color(80, 80, 80)
        self.pdf.cell(
            0, 10, "Sentiment Analysis Report",
            align="C", new_x="LMARGIN", new_y="NEXT",
        )

        self.pdf.ln(10)
        self.pdf.set_draw_color(r, g, b)
        self.pdf.set_line_width(0.5)
        self.pdf.line(60, self.pdf.get_y(), 150, self.pdf.get_y())
        self.pdf.ln(10)

        self.pdf.set_font("Helvetica", "", 12)
        self.pdf.set_text_color(100, 100, 100)
        self.pdf.cell(
            0, 8,
            f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}",
            align="C", new_x="LMARGIN", new_y="NEXT",
        )

        self.pdf.ln(5)
        self.pdf.set_font("Helvetica", "", 10)
        self.pdf.cell(
            0, 7,
            f"Total Comments Analyzed: {len(df):,}",
            align="C", new_x="LMARGIN", new_y="NEXT",
        )

        date_str = self._date_range_str(df)
        if date_str != "N/A":
            self.pdf.ln(3)
            self.pdf.set_font("Helvetica", "I", 9)
            self.pdf.set_text_color(120, 120, 120)
            self.pdf.cell(
                0, 6,
                MusoraPDF._sanitize(f"Data period: {date_str}"),
                align="C", new_x="LMARGIN", new_y="NEXT",
            )

        if filter_info:
            self.pdf.ln(8)
            self.pdf.set_font("Helvetica", "B", 9)
            self.pdf.set_text_color(80, 80, 80)
            self.pdf.cell(0, 6, "Active Filters:", align="C", new_x="LMARGIN", new_y="NEXT")
            self.pdf.set_font("Helvetica", "", 9)
            for key, value in filter_info.items():
                if value:
                    display = (
                        value if isinstance(value, str)
                        else ", ".join(str(v) for v in value)
                    )
                    self.pdf.cell(
                        0, 5,
                        MusoraPDF._sanitize(f"{key}: {display}"),
                        align="C", new_x="LMARGIN", new_y="NEXT",
                    )

        self.pdf.ln(20)
        self.pdf.set_font("Helvetica", "I", 8)
        self.pdf.set_text_color(150, 150, 150)
        self.pdf.cell(
            0, 6, "Confidential - For Internal Use Only",
            align="C", new_x="LMARGIN", new_y="NEXT",
        )
        self.pdf.cell(
            0, 6, "Data Source: Snowflake | Musora Sentiment Pipeline",
            align="C", new_x="LMARGIN", new_y="NEXT",
        )

    def _add_executive_summary(self, df) -> None:
        self.pdf.add_page()
        self.pdf.section_header("Executive Summary")
        self.pdf.section_description(_DESCRIPTIONS["executive_summary"])

        metrics = SentimentMetrics.calculate_overall_metrics(df)
        normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100

        # Health label
        neg_pct = metrics["negative_pct"]
        health = "Healthy" if neg_pct < 20 else ("Moderate" if neg_pct < 35 else "Needs Attention")

        # Opening narrative
        brands = sorted(df["brand"].dropna().unique().tolist()) if "brand" in df.columns else []
        platforms = sorted(df["platform"].dropna().unique().tolist()) if "platform" in df.columns else []
        brands_str = ", ".join(str(b).title() for b in brands[:6]) if brands else "all brands"
        platforms_str = ", ".join(str(p).title() for p in platforms[:6]) if platforms else "all platforms"

        narrative = (
            f"This report analyzes {metrics['total_comments']:,} comments across {brands_str} "
            f"on {platforms_str}. "
            f"Overall sentiment is {metrics['positive_pct']:.1f}% positive and "
            f"{metrics['negative_pct']:.1f}% negative, "
            f"with {metrics['reply_required_pct']:.1f}% of comments requiring a reply."
        )
        self.pdf.body_text(narrative)

        # Health status
        r, g, b = MusoraPDF.PRIMARY
        self.pdf.set_font("Helvetica", "B", 11)
        self.pdf.set_text_color(r, g, b)
        self.pdf.cell(
            0, 8, f"Overall Sentiment Health: {health}",
            new_x="LMARGIN", new_y="NEXT",
        )
        self.pdf.ln(2)
        self.pdf.set_text_color(0, 0, 0)

        # Metric tiles — two rows
        self.pdf.metric_row([
            ("Total Comments", f"{metrics['total_comments']:,}"),
            ("Positive %", f"{metrics['positive_pct']:.1f}%"),
            ("Negative %", f"{metrics['negative_pct']:.1f}%"),
            ("Sentiment Score", f"{normalized_score:.0f}/100"),
        ])
        self.pdf.metric_row([
            ("Reply Required", f"{metrics['total_reply_required']:,}"),
            ("Reply Rate %", f"{metrics['reply_required_pct']:.1f}%"),
            ("Brands Analyzed", str(len(brands))),
            ("Platforms Analyzed", str(len(platforms))),
        ])

        # Score explanation
        self.pdf.ln(2)
        self.pdf.callout_box(
            "How to read the Sentiment Score:\n"
            "Each comment is rated Very Positive (+2), Positive (+1), Neutral (0), "
            "Negative (-1), or Very Negative (-2). "
            "The Score (0-100) converts the average: 50 = perfectly neutral, "
            "above 60 = primarily positive, below 40 = primarily negative.",
        )

        # Key findings
        self.pdf.subsection_header("Key Findings")
        for finding in self._generate_key_findings(df, metrics):
            self.pdf.body_text(f"  *  {finding}")

    def _generate_key_findings(self, df, metrics: dict) -> list:
        findings = []

        # Sentiment summary
        if metrics["positive_pct"] > 50:
            findings.append(
                f"Sentiment is predominantly positive at {metrics['positive_pct']:.1f}%."
            )
        elif metrics["negative_pct"] > 30:
            findings.append(
                f"Negative sentiment is elevated at {metrics['negative_pct']:.1f}% - "
                f"consider targeted community management."
            )
        else:
            findings.append(
                f"Sentiment is balanced: {metrics['positive_pct']:.1f}% positive, "
                f"{metrics['negative_pct']:.1f}% negative."
            )

        # Top brand by volume
        if "brand" in df.columns and not df.empty:
            top_brand = df["brand"].value_counts().index[0]
            top_count = df["brand"].value_counts().iloc[0]
            findings.append(
                f"Most discussed brand: {str(top_brand).title()} "
                f"({top_count:,} comments, {top_count / len(df) * 100:.1f}% of total)."
            )

        # Reply urgency
        if metrics["reply_required_pct"] > 10:
            findings.append(
                f"{metrics['total_reply_required']:,} comments "
                f"({metrics['reply_required_pct']:.1f}%) require a reply."
            )

        # Top platform by volume
        if "platform" in df.columns and not df.empty:
            top_platform = df["platform"].value_counts().index[0]
            plat_count = df["platform"].value_counts().iloc[0]
            findings.append(
                f"Most active platform: {str(top_platform).title()} "
                f"({plat_count:,} comments)."
            )

        return findings[:4]

    def _add_sentiment_section(self, df) -> None:
        self.pdf.add_page()
        self.pdf.section_header("Sentiment Distribution")
        self.pdf.section_description(_DESCRIPTIONS["sentiment"])

        metrics = SentimentMetrics.calculate_overall_metrics(df)
        normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100

        pie = self.sentiment_charts.create_sentiment_pie_chart(df, title="Sentiment Distribution")
        gauge = self.sentiment_charts.create_sentiment_score_gauge(
            metrics["avg_sentiment_score"], title="Overall Sentiment Score"
        )
        self._add_two_charts(pie, gauge)

        self.pdf.body_text(
            f"Across {metrics['total_comments']:,} analyzed comments: "
            f"{metrics['positive_pct']:.1f}% positive, "
            f"{100 - metrics['positive_pct'] - metrics['negative_pct']:.1f}% neutral, "
            f"{metrics['negative_pct']:.1f}% negative. "
            f"Sentiment Score: {normalized_score:.0f}/100 "
            f"(raw average: {metrics['avg_sentiment_score']:.2f} on a -2 to +2 scale)."
        )

    def _add_brand_section(self, df) -> None:
        if "brand" not in df.columns or df["brand"].nunique() == 0:
            return

        self.pdf.add_page()
        self.pdf.section_header("Sentiment by Brand")
        self.pdf.section_description(_DESCRIPTIONS["brand"])

        bar = self.sentiment_charts.create_sentiment_bar_chart(
            df, group_by="brand", title="Sentiment Distribution by Brand"
        )
        pct = self.sentiment_charts.create_sentiment_percentage_bar_chart(
            df, group_by="brand", title="Sentiment by Brand (%)"
        )
        self._add_two_charts(bar, pct)

        # Summary table
        brand_metrics = SentimentMetrics.calculate_brand_metrics(df)
        rows = []
        for brand, m in sorted(brand_metrics.items()):
            score = ((m["avg_sentiment_score"] + 2) / 4) * 100
            rows.append((
                str(brand).title(),
                f"{m['total_comments']:,}",
                f"{m['positive_pct']:.1f}%",
                f"{m['negative_pct']:.1f}%",
                f"{m['reply_required_pct']:.1f}%",
                f"{score:.0f}/100",
            ))
        self.pdf.subsection_header("Brand Metrics Summary")
        self.pdf.add_table(
            headers=["Brand", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"],
            rows=rows,
            col_widths=[38, 32, 30, 30, 30, 30],
        )

    def _add_platform_section(self, df) -> None:
        if "platform" not in df.columns or df["platform"].nunique() == 0:
            return

        self.pdf.add_page()
        self.pdf.section_header("Sentiment by Platform")
        self.pdf.section_description(_DESCRIPTIONS["platform"])

        bar = self.sentiment_charts.create_sentiment_bar_chart(
            df, group_by="platform", title="Sentiment Distribution by Platform"
        )
        pct = self.sentiment_charts.create_sentiment_percentage_bar_chart(
            df, group_by="platform", title="Sentiment by Platform (%)"
        )
        self._add_two_charts(bar, pct)

        # Summary table
        platform_metrics = SentimentMetrics.calculate_platform_metrics(df)
        rows = []
        for platform, m in sorted(platform_metrics.items()):
            score = ((m["avg_sentiment_score"] + 2) / 4) * 100
            rows.append((
                str(platform).title(),
                f"{m['total_comments']:,}",
                f"{m['positive_pct']:.1f}%",
                f"{m['negative_pct']:.1f}%",
                f"{m['reply_required_pct']:.1f}%",
                f"{score:.0f}/100",
            ))
        self.pdf.subsection_header("Platform Metrics Summary")
        self.pdf.add_table(
            headers=["Platform", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"],
            rows=rows,
            col_widths=[38, 32, 30, 30, 30, 30],
        )

    def _add_intent_section(self, df) -> None:
        if "intent" not in df.columns:
            return

        self.pdf.add_page()
        self.pdf.section_header("Intent Analysis")
        self.pdf.section_description(_DESCRIPTIONS["intent"])

        intent_bar = self.distribution_charts.create_intent_bar_chart(
            df, title="Intent Distribution", orientation="h"
        )
        intent_pie = self.distribution_charts.create_intent_pie_chart(
            df, title="Intent Distribution"
        )
        self._add_two_charts(intent_bar, intent_pie)

    def _add_cross_dimensional_section(self, df) -> None:
        if "brand" not in df.columns or "platform" not in df.columns:
            return

        self.pdf.add_page()
        self.pdf.section_header("Cross-Dimensional Analysis")
        self.pdf.section_description(_DESCRIPTIONS["cross_dimensional"])

        matrix = self.distribution_charts.create_brand_platform_matrix(
            df, title="Brand-Platform Comment Matrix"
        )
        heatmap = self.sentiment_charts.create_sentiment_heatmap(
            df,
            row_dimension="brand",
            col_dimension="platform",
            title="Negative Sentiment Heatmap",
        )
        self._add_two_charts(matrix, heatmap)

    def _add_volume_section(self, df) -> None:
        has_platform = "platform" in df.columns
        has_brand = "brand" in df.columns
        if not has_platform and not has_brand:
            return

        self.pdf.add_page()
        self.pdf.section_header("Volume Analysis")
        self.pdf.section_description(_DESCRIPTIONS["volume"])

        if has_platform and has_brand:
            platform_dist = self.distribution_charts.create_platform_distribution(
                df, title="Comments by Platform"
            )
            brand_dist = self.distribution_charts.create_brand_distribution(
                df, title="Comments by Brand"
            )
            self._add_two_charts(platform_dist, brand_dist)
        elif has_platform:
            self._add_chart(
                self.distribution_charts.create_platform_distribution(df, title="Comments by Platform")
            )
        else:
            self._add_chart(
                self.distribution_charts.create_brand_distribution(df, title="Comments by Brand")
            )

    def _add_reply_requirements_section(self, df) -> None:
        if "requires_reply" not in df.columns:
            return

        self.pdf.add_page()
        self.pdf.section_header("Reply Requirements Analysis")
        self.pdf.section_description(_DESCRIPTIONS["reply_requirements"])

        urgency = SentimentMetrics.calculate_response_urgency(df)
        self.pdf.metric_row([
            ("Urgent", str(urgency["urgent_count"])),
            ("High Priority", str(urgency["high_priority_count"])),
            ("Medium Priority", str(urgency["medium_priority_count"])),
            ("Low Priority", str(urgency["low_priority_count"])),
        ])
        self.pdf.ln(3)

        has_brand = "brand" in df.columns
        has_platform = "platform" in df.columns
        if has_brand and has_platform:
            reply_brand = self.distribution_charts.create_reply_required_chart(
                df, group_by="brand", title="Comments Requiring Reply by Brand"
            )
            reply_platform = self.distribution_charts.create_reply_required_chart(
                df, group_by="platform", title="Comments Requiring Reply by Platform"
            )
            self._add_two_charts(reply_brand, reply_platform)
        elif has_brand:
            self._add_chart(
                self.distribution_charts.create_reply_required_chart(
                    df, group_by="brand", title="Comments Requiring Reply by Brand"
                )
            )

    def _add_demographics_section(self, df) -> None:
        df_musora = df[df["platform"] == "musora_app"].copy()
        if df_musora.empty:
            return

        self.pdf.add_page()
        self.pdf.section_header("Demographics Analysis (Musora App)")
        self.pdf.section_description(_DESCRIPTIONS["demographics"])
        self.pdf.body_text(f"Analyzing demographics for {len(df_musora):,} Musora App comments.")

        # Age
        age_dist = self.processor.get_demographics_distribution(df_musora, "age_group")
        if not age_dist.empty:
            self.pdf.subsection_header("Age Distribution")
            self._add_chart(
                self.demographic_charts.create_age_distribution_chart(
                    age_dist, title="Comments by Age Group"
                ),
                img_height=350,
            )

        # Region
        region_dist = self.processor.get_timezone_regions_distribution(df_musora)
        if not region_dist.empty:
            self.pdf.subsection_header("Geographic Distribution")
            self._add_chart(
                self.demographic_charts.create_region_distribution_chart(
                    region_dist, title="Comments by Region"
                ),
                img_height=350,
            )

        # Experience
        exp_dist = self.processor.get_experience_level_distribution(df_musora, use_groups=True)
        if not exp_dist.empty:
            self.pdf.subsection_header("Experience Level Distribution")
            self._add_chart(
                self.demographic_charts.create_experience_distribution_chart(
                    exp_dist, title="Comments by Experience Group", use_groups=True
                ),
                img_height=350,
            )

    def _add_language_section(self, df) -> None:
        self.pdf.add_page()
        self.pdf.section_header("Language Distribution")
        self.pdf.section_description(_DESCRIPTIONS["language"])
        self._add_chart(
            self.distribution_charts.create_language_distribution(df, top_n=10, title="Top 10 Languages")
        )

    def _add_data_summary(self, df, filter_info: dict) -> None:
        self.pdf.add_page()
        self.pdf.section_header("Data Summary")

        self.pdf.body_text(
            f"Report generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        )
        self.pdf.body_text(f"Total records in report: {len(df):,}")

        date_str = self._date_range_str(df)
        if date_str != "N/A":
            self.pdf.body_text(f"Data range: {date_str}")

        self.pdf.body_text(f"Active filters: {self._filter_summary(filter_info)}")

        if "brand" in df.columns:
            brands = sorted(str(b).title() for b in df["brand"].dropna().unique())
            self.pdf.body_text(f"Brands included: {', '.join(brands)}")

        if "platform" in df.columns:
            platforms = sorted(str(p).title() for p in df["platform"].dropna().unique())
            self.pdf.body_text(f"Platforms included: {', '.join(platforms)}")

        self.pdf.ln(5)
        self.pdf.callout_box(
            "Data source: Snowflake - SOCIAL_MEDIA_DB.ML_FEATURES.COMMENT_SENTIMENT_FEATURES "
            "and SOCIAL_MEDIA_DB.ML_FEATURES.MUSORA_COMMENT_SENTIMENT_FEATURES.\n"
            "This report is confidential and intended for internal Musora team use only.",
            bg_color=(245, 245, 245),
        )