""" PDF Report Exporter for Musora Sentiment Analysis Dashboard. Generates a comprehensive PDF report from the filtered dashboard data and Plotly visualizations. Dependencies: fpdf2 — PDF assembly (pip install fpdf2) kaleido — Plotly PNG rendering (pip install kaleido) """ import os import sys import tempfile import logging from datetime import datetime from pathlib import Path # Ensure the visualization package root is importable when this module is # loaded directly (e.g., during testing outside Streamlit). _parent = Path(__file__).resolve().parent.parent if str(_parent) not in sys.path: sys.path.insert(0, str(_parent)) import plotly.io as pio from fpdf import FPDF from utils.metrics import SentimentMetrics from utils.data_processor import SentimentDataProcessor from visualizations.sentiment_charts import SentimentCharts from visualizations.distribution_charts import DistributionCharts from visualizations.demographic_charts import DemographicCharts logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Section descriptions — plain-language context shown below each section header. # --------------------------------------------------------------------------- _DESCRIPTIONS = { "executive_summary": ( "A top-level snapshot of community sentiment across all Musora brands and platforms. " "All findings are based on comments processed through the AI sentiment analysis pipeline." ), "sentiment": ( "Every comment is assigned one of five sentiment levels: " "Very Positive, Positive, Neutral, Negative, or Very Negative. " "The pie chart shows how those levels split across all analyzed comments. " "The Sentiment Score (0-100) converts the average rating to a percentage scale: " "50 = perfectly neutral, above 60 = primarily positive." ), "brand": ( "Sentiment broken down by Musora brand (Drumeo, Pianote, Guitareo, Singeo, etc.). " "Shows both the count and percentage of each sentiment level per brand, " "helping identify which brands receive the most positive or negative feedback." ), "platform": ( "Sentiment broken down by platform (Facebook, Instagram, YouTube, Twitter, Musora App). " "Helps compare audience sentiment across channels." ), "intent": ( "Beyond positive/negative, the AI identifies the intent behind each comment: " "praise, questions, requests, feedback, suggestions, humor, off-topic, or spam. " "Understanding intent helps prioritize community management." ), "cross_dimensional": ( "Cross-dimensional analysis reveals patterns across both brand and platform simultaneously. " "The heatmaps show comment volume and negative sentiment concentration by combination." ), "volume": ( "Volume analysis shows the distribution of comments across platforms and brands, " "indicating where the most community engagement is happening." ), "reply_requirements": ( "Comments flagged as requiring a reply, broken down by brand and platform. " "The urgency breakdown helps prioritize community management resources." ), "demographics": ( "Demographics data is available for Musora App comments and is derived from user profiles. " "Note: These charts reflect only users who have filled in their profile information - " "they do not represent all community members." ), "language": ( "Language distribution shows what languages comments are written in. " "Non-English comments are automatically translated for analysis." ), } # --------------------------------------------------------------------------- # Musora brand colours # --------------------------------------------------------------------------- _PRIMARY_HEX = "#1982C4" _PRIMARY_RGB = (25, 130, 196) # --------------------------------------------------------------------------- # PDF document class # --------------------------------------------------------------------------- class MusoraPDF(FPDF): """Custom FPDF subclass with Musora branding and layout helpers.""" PRIMARY = _PRIMARY_RGB WHITE = (255, 255, 255) GRAY = (180, 180, 180) LIGHT_GRAY = (240, 240, 240) def __init__(self): super().__init__(orientation="P", unit="mm", format="A4") self.set_auto_page_break(auto=True, margin=20) # ------------------------------------------------------------------ # Helpers # ------------------------------------------------------------------ @staticmethod def _sanitize(text: str) -> str: """Strip characters outside Latin-1 (required by the Helvetica font).""" if not isinstance(text, str): text = str(text) return text.encode("latin-1", errors="ignore").decode("latin-1") # ------------------------------------------------------------------ # FPDF overrides # ------------------------------------------------------------------ def header(self): if self.page_no() > 1: self.set_font("Helvetica", "B", 8) self.set_text_color(*self.GRAY) self.cell(0, 6, "Musora Sentiment Analysis Report", align="L") self.cell( 0, 6, f"Page {self.page_no()}", align="R", new_x="LMARGIN", new_y="NEXT", ) self.set_draw_color(*self.PRIMARY) self.set_line_width(0.5) self.line(10, self.get_y(), 200, self.get_y()) self.ln(4) def footer(self): self.set_y(-15) self.set_font("Helvetica", "I", 7) self.set_text_color(*self.GRAY) self.cell( 0, 10, f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')} | Confidential", align="C", ) # ------------------------------------------------------------------ # Layout primitives # ------------------------------------------------------------------ def check_page_break(self, needed_mm: float) -> None: """Add a page break if less than *needed_mm* mm remain on the page.""" if self.get_y() + needed_mm > self.h - 20: self.add_page() def section_header(self, title: str) -> None: """Bold, brand-coloured section heading with an underline rule.""" title = self._sanitize(title) self.check_page_break(20) self.ln(4) self.set_font("Helvetica", "B", 14) self.set_text_color(*self.PRIMARY) self.cell(0, 10, title, new_x="LMARGIN", new_y="NEXT") self.set_draw_color(*self.PRIMARY) self.set_line_width(0.3) self.line(10, self.get_y(), 200, self.get_y()) self.ln(3) self.set_text_color(0, 0, 0) def subsection_header(self, title: str) -> None: """Lighter subsection heading.""" title = self._sanitize(title) self.check_page_break(15) self.ln(2) self.set_font("Helvetica", "B", 11) self.set_text_color(60, 60, 60) self.cell(0, 8, title, new_x="LMARGIN", new_y="NEXT") self.ln(1) self.set_text_color(0, 0, 0) def section_description(self, text: str) -> None: """Italicised description block beneath a section header.""" text = self._sanitize(text) self.set_font("Helvetica", "I", 9) self.set_text_color(80, 80, 80) self.multi_cell(0, 5, text) self.ln(4) self.set_text_color(0, 0, 0) def body_text(self, text: str) -> None: """Standard paragraph text.""" text = self._sanitize(text) self.set_font("Helvetica", "", 9) self.set_text_color(50, 50, 50) self.multi_cell(0, 5, text) self.ln(2) self.set_text_color(0, 0, 0) def callout_box( self, text: str, bg_color: tuple = (240, 248, 255), border_color: tuple = None, ) -> None: """Lightly-coloured info/callout box with a left accent bar.""" if border_color is None: border_color = self.PRIMARY text = self._sanitize(text) self.check_page_break(20) x, w = 10, 180 approx_lines = max(2, len(text) // 90 + text.count("\n") + 1) h = approx_lines * 5 + 6 y = self.get_y() self.set_fill_color(*bg_color) self.rect(x, y, w, h, style="F") self.set_fill_color(*border_color) self.rect(x, y, 3, h, style="F") self.set_font("Helvetica", "", 8.5) self.set_text_color(40, 40, 40) self.set_xy(x + 5, y + 3) self.multi_cell(w - 7, 4.5, text) self.set_y(y + h + 3) self.set_text_color(0, 0, 0) def metric_row(self, metrics: list) -> None: """ Horizontal row of metric tiles. Args: metrics: list of (label, value) tuples. """ self.check_page_break(18) n = len(metrics) if n == 0: return box_w = (190 - (n - 1) * 3) / n x0 = 10 y = self.get_y() for i, (label, value) in enumerate(metrics): x = x0 + i * (box_w + 3) self.set_fill_color(245, 245, 245) self.rect(x, y, box_w, 14, style="F") self.set_xy(x, y + 1) self.set_font("Helvetica", "B", 10) self.set_text_color(*self.PRIMARY) self.cell(box_w, 6, self._sanitize(str(value)), align="C") self.set_xy(x, y + 7) self.set_font("Helvetica", "", 7) self.set_text_color(100, 100, 100) self.cell(box_w, 5, self._sanitize(str(label)), align="C") self.set_text_color(0, 0, 0) self.set_y(y + 16) def add_table( self, headers: list, rows: list, col_widths: list = None, ) -> None: """ Styled data table with alternating row shading. Args: headers: Column header strings. rows: List of row tuples/lists. col_widths: Optional column widths in mm. """ self.check_page_break(10 + len(rows) * 6) n = len(headers) if col_widths is None: col_widths = [190 / n] * n # Header self.set_font("Helvetica", "B", 8) self.set_fill_color(*self.PRIMARY) self.set_text_color(*self.WHITE) for i, hdr in enumerate(headers): self.cell(col_widths[i], 7, self._sanitize(hdr), border=1, fill=True, align="C") self.ln() # Rows self.set_font("Helvetica", "", 8) self.set_text_color(0, 0, 0) for row_idx, row in enumerate(rows): self.set_fill_color(250, 250, 250) if row_idx % 2 == 0 else self.set_fill_color(*self.WHITE) for i, cell_val in enumerate(row): self.cell(col_widths[i], 6, self._sanitize(str(cell_val)), border=1, fill=True, align="C") self.ln() self.ln(2) # --------------------------------------------------------------------------- # Main exporter # --------------------------------------------------------------------------- class DashboardPDFExporter: """ Generates a comprehensive PDF report from the Musora Sentiment dashboard. Usage:: exporter = DashboardPDFExporter() pdf_bytes = exporter.generate_report(filtered_df, filter_info) The *filter_info* dict (optional) maps human-readable filter names to their selected values and is shown on the cover page. """ # Kaleido scale factor: 3× ≈ 300 DPI at A4 print size. RENDER_SCALE = 3 def __init__(self): self.sentiment_charts = SentimentCharts() self.distribution_charts = DistributionCharts() self.demographic_charts = DemographicCharts() self.processor = SentimentDataProcessor() self._temp_files: list[str] = [] # ------------------------------------------------------------------ # Public entry point # ------------------------------------------------------------------ def generate_report(self, df, filter_info: dict = None) -> bytes: """ Build and return the full PDF report. Args: df: Filtered dashboard DataFrame. filter_info: Optional dict of active filter descriptions shown on the cover page, e.g. {"Platforms": ["facebook"], "Brands": ["drumeo"]}. Returns: bytes: Raw PDF file contents ready for st.download_button. """ self.pdf = MusoraPDF() try: self._add_cover_page(df, filter_info) self._add_executive_summary(df) self._add_sentiment_section(df) self._add_brand_section(df) self._add_platform_section(df) self._add_intent_section(df) self._add_cross_dimensional_section(df) self._add_volume_section(df) self._add_reply_requirements_section(df) if self._has_demographics(df): self._add_demographics_section(df) if "detected_language" in df.columns: self._add_language_section(df) self._add_data_summary(df, filter_info) return bytes(self.pdf.output()) finally: self._cleanup_temp_files() # ------------------------------------------------------------------ # Chart rendering helpers # ------------------------------------------------------------------ def _prepare_fig_for_pdf(self, fig, is_side_by_side: bool = False) -> None: """Apply white background, readable fonts, and automargin to a Plotly figure.""" base_fs = 13 if is_side_by_side else 14 fig.update_layout( paper_bgcolor="white", plot_bgcolor="white", font=dict(color="black", size=base_fs), title_font_size=base_fs + 4, margin=( dict(l=60, r=40, t=60, b=60) if is_side_by_side else dict(l=80, r=40, t=60, b=80) ), ) fig.update_xaxes(automargin=True) fig.update_yaxes(automargin=True) if fig.layout.showlegend is not False: fig.update_layout(legend_font_size=base_fs - 2) def _fig_to_temp_path( self, fig, width: int = 800, height: int = 400, is_side_by_side: bool = False ) -> str: """Render a Plotly figure to a temporary high-DPI PNG and return the path.""" self._prepare_fig_for_pdf(fig, is_side_by_side=is_side_by_side) img_bytes = pio.to_image( fig, format="png", width=width, height=height, scale=self.RENDER_SCALE, engine="kaleido", ) tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) tmp.write(img_bytes) tmp.close() self._temp_files.append(tmp.name) return tmp.name def _add_chart(self, fig, width: int = 180, img_width: int = 800, img_height: int = 400) -> None: """Render one figure full-width on the current PDF page.""" try: path = self._fig_to_temp_path(fig, img_width, img_height) h_mm = width * (img_height / img_width) self.pdf.check_page_break(h_mm + 5) self.pdf.image(path, x=10, w=width) self.pdf.ln(3) except Exception: logger.exception("Chart render failed") self.pdf.body_text("[Chart could not be rendered]") def _add_two_charts(self, fig1, fig2, width: int = 92) -> None: """Render two figures side-by-side.""" try: p1 = self._fig_to_temp_path(fig1, 700, 450, is_side_by_side=True) p2 = self._fig_to_temp_path(fig2, 700, 450, is_side_by_side=True) h_mm = width * (450 / 700) self.pdf.check_page_break(h_mm + 5) y = self.pdf.get_y() self.pdf.image(p1, x=10, y=y, w=width) self.pdf.image(p2, x=10 + width + 4, y=y, w=width) self.pdf.set_y(y + h_mm + 3) except Exception: logger.exception("Side-by-side chart render failed") self.pdf.body_text("[Charts could not be rendered]") def _cleanup_temp_files(self) -> None: for path in self._temp_files: try: os.unlink(path) except OSError: pass self._temp_files.clear() # ------------------------------------------------------------------ # Data helpers # ------------------------------------------------------------------ @staticmethod def _has_demographics(df) -> bool: return ( "platform" in df.columns and "musora_app" in df["platform"].values and "age_group" in df.columns and "timezone" in df.columns and "experience_level" in df.columns ) @staticmethod def _filter_summary(filter_info: dict) -> str: if not filter_info: return "No filters applied - showing all data." parts = [] for key, value in filter_info.items(): if value: display = ( value if isinstance(value, str) else ", ".join(str(v) for v in value) ) parts.append(f"{key}: {display}") return "; ".join(parts) if parts else "No filters applied." @staticmethod def _date_range_str(df) -> str: if "comment_timestamp" not in df.columns or df.empty: return "N/A" valid = df["comment_timestamp"].dropna() if valid.empty: return "N/A" return ( f"{valid.min().strftime('%b %d, %Y')} to {valid.max().strftime('%b %d, %Y')}" ) # ------------------------------------------------------------------ # Report sections # ------------------------------------------------------------------ def _add_cover_page(self, df, filter_info: dict) -> None: self.pdf.add_page() self.pdf.ln(40) r, g, b = MusoraPDF.PRIMARY self.pdf.set_fill_color(r, g, b) self.pdf.rect(0, 60, 210, 4, style="F") self.pdf.ln(20) self.pdf.set_font("Helvetica", "B", 28) self.pdf.set_text_color(r, g, b) self.pdf.cell(0, 15, "Musora", align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.set_font("Helvetica", "", 16) self.pdf.set_text_color(80, 80, 80) self.pdf.cell( 0, 10, "Sentiment Analysis Report", align="C", new_x="LMARGIN", new_y="NEXT", ) self.pdf.ln(10) self.pdf.set_draw_color(r, g, b) self.pdf.set_line_width(0.5) self.pdf.line(60, self.pdf.get_y(), 150, self.pdf.get_y()) self.pdf.ln(10) self.pdf.set_font("Helvetica", "", 12) self.pdf.set_text_color(100, 100, 100) self.pdf.cell( 0, 8, f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}", align="C", new_x="LMARGIN", new_y="NEXT", ) self.pdf.ln(5) self.pdf.set_font("Helvetica", "", 10) self.pdf.cell( 0, 7, f"Total Comments Analyzed: {len(df):,}", align="C", new_x="LMARGIN", new_y="NEXT", ) date_str = self._date_range_str(df) if date_str != "N/A": self.pdf.ln(3) self.pdf.set_font("Helvetica", "I", 9) self.pdf.set_text_color(120, 120, 120) self.pdf.cell( 0, 6, MusoraPDF._sanitize(f"Data period: {date_str}"), align="C", new_x="LMARGIN", new_y="NEXT", ) if filter_info: self.pdf.ln(8) self.pdf.set_font("Helvetica", "B", 9) self.pdf.set_text_color(80, 80, 80) self.pdf.cell(0, 6, "Active Filters:", align="C", new_x="LMARGIN", new_y="NEXT") self.pdf.set_font("Helvetica", "", 9) for key, value in filter_info.items(): if value: display = ( value if isinstance(value, str) else ", ".join(str(v) for v in value) ) self.pdf.cell( 0, 5, MusoraPDF._sanitize(f"{key}: {display}"), align="C", new_x="LMARGIN", new_y="NEXT", ) self.pdf.ln(20) self.pdf.set_font("Helvetica", "I", 8) self.pdf.set_text_color(150, 150, 150) self.pdf.cell( 0, 6, "Confidential - For Internal Use Only", align="C", new_x="LMARGIN", new_y="NEXT", ) self.pdf.cell( 0, 6, "Data Source: Snowflake | Musora Sentiment Pipeline", align="C", new_x="LMARGIN", new_y="NEXT", ) def _add_executive_summary(self, df) -> None: self.pdf.add_page() self.pdf.section_header("Executive Summary") self.pdf.section_description(_DESCRIPTIONS["executive_summary"]) metrics = SentimentMetrics.calculate_overall_metrics(df) normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100 # Health label neg_pct = metrics["negative_pct"] health = "Healthy" if neg_pct < 20 else ("Moderate" if neg_pct < 35 else "Needs Attention") # Opening narrative brands = sorted(df["brand"].dropna().unique().tolist()) if "brand" in df.columns else [] platforms = sorted(df["platform"].dropna().unique().tolist()) if "platform" in df.columns else [] brands_str = ", ".join(str(b).title() for b in brands[:6]) if brands else "all brands" platforms_str = ", ".join(str(p).title() for p in platforms[:6]) if platforms else "all platforms" narrative = ( f"This report analyzes {metrics['total_comments']:,} comments across {brands_str} " f"on {platforms_str}. " f"Overall sentiment is {metrics['positive_pct']:.1f}% positive and " f"{metrics['negative_pct']:.1f}% negative, " f"with {metrics['reply_required_pct']:.1f}% of comments requiring a reply." ) self.pdf.body_text(narrative) # Health status r, g, b = MusoraPDF.PRIMARY self.pdf.set_font("Helvetica", "B", 11) self.pdf.set_text_color(r, g, b) self.pdf.cell( 0, 8, f"Overall Sentiment Health: {health}", new_x="LMARGIN", new_y="NEXT", ) self.pdf.ln(2) self.pdf.set_text_color(0, 0, 0) # Metric tiles — two rows self.pdf.metric_row([ ("Total Comments", f"{metrics['total_comments']:,}"), ("Positive %", f"{metrics['positive_pct']:.1f}%"), ("Negative %", f"{metrics['negative_pct']:.1f}%"), ("Sentiment Score", f"{normalized_score:.0f}/100"), ]) self.pdf.metric_row([ ("Reply Required", f"{metrics['total_reply_required']:,}"), ("Reply Rate %", f"{metrics['reply_required_pct']:.1f}%"), ("Brands Analyzed", str(len(brands))), ("Platforms Analyzed", str(len(platforms))), ]) # Score explanation self.pdf.ln(2) self.pdf.callout_box( "How to read the Sentiment Score:\n" "Each comment is rated Very Positive (+2), Positive (+1), Neutral (0), " "Negative (-1), or Very Negative (-2). " "The Score (0-100) converts the average: 50 = perfectly neutral, " "above 60 = primarily positive, below 40 = primarily negative.", ) # Key findings self.pdf.subsection_header("Key Findings") for finding in self._generate_key_findings(df, metrics): self.pdf.body_text(f" * {finding}") def _generate_key_findings(self, df, metrics: dict) -> list: findings = [] # Sentiment summary if metrics["positive_pct"] > 50: findings.append( f"Sentiment is predominantly positive at {metrics['positive_pct']:.1f}%." ) elif metrics["negative_pct"] > 30: findings.append( f"Negative sentiment is elevated at {metrics['negative_pct']:.1f}% - " f"consider targeted community management." ) else: findings.append( f"Sentiment is balanced: {metrics['positive_pct']:.1f}% positive, " f"{metrics['negative_pct']:.1f}% negative." ) # Top brand by volume if "brand" in df.columns and not df.empty: top_brand = df["brand"].value_counts().index[0] top_count = df["brand"].value_counts().iloc[0] findings.append( f"Most discussed brand: {str(top_brand).title()} " f"({top_count:,} comments, {top_count / len(df) * 100:.1f}% of total)." ) # Reply urgency if metrics["reply_required_pct"] > 10: findings.append( f"{metrics['total_reply_required']:,} comments " f"({metrics['reply_required_pct']:.1f}%) require a reply." ) # Top platform by volume if "platform" in df.columns and not df.empty: top_platform = df["platform"].value_counts().index[0] plat_count = df["platform"].value_counts().iloc[0] findings.append( f"Most active platform: {str(top_platform).title()} " f"({plat_count:,} comments)." ) return findings[:4] def _add_sentiment_section(self, df) -> None: self.pdf.add_page() self.pdf.section_header("Sentiment Distribution") self.pdf.section_description(_DESCRIPTIONS["sentiment"]) metrics = SentimentMetrics.calculate_overall_metrics(df) normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100 pie = self.sentiment_charts.create_sentiment_pie_chart(df, title="Sentiment Distribution") gauge = self.sentiment_charts.create_sentiment_score_gauge( metrics["avg_sentiment_score"], title="Overall Sentiment Score" ) self._add_two_charts(pie, gauge) self.pdf.body_text( f"Across {metrics['total_comments']:,} analyzed comments: " f"{metrics['positive_pct']:.1f}% positive, " f"{100 - metrics['positive_pct'] - metrics['negative_pct']:.1f}% neutral, " f"{metrics['negative_pct']:.1f}% negative. " f"Sentiment Score: {normalized_score:.0f}/100 " f"(raw average: {metrics['avg_sentiment_score']:.2f} on a -2 to +2 scale)." ) def _add_brand_section(self, df) -> None: if "brand" not in df.columns or df["brand"].nunique() == 0: return self.pdf.add_page() self.pdf.section_header("Sentiment by Brand") self.pdf.section_description(_DESCRIPTIONS["brand"]) bar = self.sentiment_charts.create_sentiment_bar_chart( df, group_by="brand", title="Sentiment Distribution by Brand" ) pct = self.sentiment_charts.create_sentiment_percentage_bar_chart( df, group_by="brand", title="Sentiment by Brand (%)" ) self._add_two_charts(bar, pct) # Summary table brand_metrics = SentimentMetrics.calculate_brand_metrics(df) rows = [] for brand, m in sorted(brand_metrics.items()): score = ((m["avg_sentiment_score"] + 2) / 4) * 100 rows.append(( str(brand).title(), f"{m['total_comments']:,}", f"{m['positive_pct']:.1f}%", f"{m['negative_pct']:.1f}%", f"{m['reply_required_pct']:.1f}%", f"{score:.0f}/100", )) self.pdf.subsection_header("Brand Metrics Summary") self.pdf.add_table( headers=["Brand", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"], rows=rows, col_widths=[38, 32, 30, 30, 30, 30], ) def _add_platform_section(self, df) -> None: if "platform" not in df.columns or df["platform"].nunique() == 0: return self.pdf.add_page() self.pdf.section_header("Sentiment by Platform") self.pdf.section_description(_DESCRIPTIONS["platform"]) bar = self.sentiment_charts.create_sentiment_bar_chart( df, group_by="platform", title="Sentiment Distribution by Platform" ) pct = self.sentiment_charts.create_sentiment_percentage_bar_chart( df, group_by="platform", title="Sentiment by Platform (%)" ) self._add_two_charts(bar, pct) # Summary table platform_metrics = SentimentMetrics.calculate_platform_metrics(df) rows = [] for platform, m in sorted(platform_metrics.items()): score = ((m["avg_sentiment_score"] + 2) / 4) * 100 rows.append(( str(platform).title(), f"{m['total_comments']:,}", f"{m['positive_pct']:.1f}%", f"{m['negative_pct']:.1f}%", f"{m['reply_required_pct']:.1f}%", f"{score:.0f}/100", )) self.pdf.subsection_header("Platform Metrics Summary") self.pdf.add_table( headers=["Platform", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"], rows=rows, col_widths=[38, 32, 30, 30, 30, 30], ) def _add_intent_section(self, df) -> None: if "intent" not in df.columns: return self.pdf.add_page() self.pdf.section_header("Intent Analysis") self.pdf.section_description(_DESCRIPTIONS["intent"]) intent_bar = self.distribution_charts.create_intent_bar_chart( df, title="Intent Distribution", orientation="h" ) intent_pie = self.distribution_charts.create_intent_pie_chart( df, title="Intent Distribution" ) self._add_two_charts(intent_bar, intent_pie) def _add_cross_dimensional_section(self, df) -> None: if "brand" not in df.columns or "platform" not in df.columns: return self.pdf.add_page() self.pdf.section_header("Cross-Dimensional Analysis") self.pdf.section_description(_DESCRIPTIONS["cross_dimensional"]) matrix = self.distribution_charts.create_brand_platform_matrix( df, title="Brand-Platform Comment Matrix" ) heatmap = self.sentiment_charts.create_sentiment_heatmap( df, row_dimension="brand", col_dimension="platform", title="Negative Sentiment Heatmap", ) self._add_two_charts(matrix, heatmap) def _add_volume_section(self, df) -> None: has_platform = "platform" in df.columns has_brand = "brand" in df.columns if not has_platform and not has_brand: return self.pdf.add_page() self.pdf.section_header("Volume Analysis") self.pdf.section_description(_DESCRIPTIONS["volume"]) if has_platform and has_brand: platform_dist = self.distribution_charts.create_platform_distribution( df, title="Comments by Platform" ) brand_dist = self.distribution_charts.create_brand_distribution( df, title="Comments by Brand" ) self._add_two_charts(platform_dist, brand_dist) elif has_platform: self._add_chart( self.distribution_charts.create_platform_distribution(df, title="Comments by Platform") ) else: self._add_chart( self.distribution_charts.create_brand_distribution(df, title="Comments by Brand") ) def _add_reply_requirements_section(self, df) -> None: if "requires_reply" not in df.columns: return self.pdf.add_page() self.pdf.section_header("Reply Requirements Analysis") self.pdf.section_description(_DESCRIPTIONS["reply_requirements"]) urgency = SentimentMetrics.calculate_response_urgency(df) self.pdf.metric_row([ ("Urgent", str(urgency["urgent_count"])), ("High Priority", str(urgency["high_priority_count"])), ("Medium Priority", str(urgency["medium_priority_count"])), ("Low Priority", str(urgency["low_priority_count"])), ]) self.pdf.ln(3) has_brand = "brand" in df.columns has_platform = "platform" in df.columns if has_brand and has_platform: reply_brand = self.distribution_charts.create_reply_required_chart( df, group_by="brand", title="Comments Requiring Reply by Brand" ) reply_platform = self.distribution_charts.create_reply_required_chart( df, group_by="platform", title="Comments Requiring Reply by Platform" ) self._add_two_charts(reply_brand, reply_platform) elif has_brand: self._add_chart( self.distribution_charts.create_reply_required_chart( df, group_by="brand", title="Comments Requiring Reply by Brand" ) ) def _add_demographics_section(self, df) -> None: df_musora = df[df["platform"] == "musora_app"].copy() if df_musora.empty: return self.pdf.add_page() self.pdf.section_header("Demographics Analysis (Musora App)") self.pdf.section_description(_DESCRIPTIONS["demographics"]) self.pdf.body_text(f"Analyzing demographics for {len(df_musora):,} Musora App comments.") # Age age_dist = self.processor.get_demographics_distribution(df_musora, "age_group") if not age_dist.empty: self.pdf.subsection_header("Age Distribution") self._add_chart( self.demographic_charts.create_age_distribution_chart( age_dist, title="Comments by Age Group" ), img_height=350, ) # Region region_dist = self.processor.get_timezone_regions_distribution(df_musora) if not region_dist.empty: self.pdf.subsection_header("Geographic Distribution") self._add_chart( self.demographic_charts.create_region_distribution_chart( region_dist, title="Comments by Region" ), img_height=350, ) # Experience exp_dist = self.processor.get_experience_level_distribution(df_musora, use_groups=True) if not exp_dist.empty: self.pdf.subsection_header("Experience Level Distribution") self._add_chart( self.demographic_charts.create_experience_distribution_chart( exp_dist, title="Comments by Experience Group", use_groups=True ), img_height=350, ) def _add_language_section(self, df) -> None: self.pdf.add_page() self.pdf.section_header("Language Distribution") self.pdf.section_description(_DESCRIPTIONS["language"]) self._add_chart( self.distribution_charts.create_language_distribution(df, top_n=10, title="Top 10 Languages") ) def _add_data_summary(self, df, filter_info: dict) -> None: self.pdf.add_page() self.pdf.section_header("Data Summary") self.pdf.body_text( f"Report generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" ) self.pdf.body_text(f"Total records in report: {len(df):,}") date_str = self._date_range_str(df) if date_str != "N/A": self.pdf.body_text(f"Data range: {date_str}") self.pdf.body_text(f"Active filters: {self._filter_summary(filter_info)}") if "brand" in df.columns: brands = sorted(str(b).title() for b in df["brand"].dropna().unique()) self.pdf.body_text(f"Brands included: {', '.join(brands)}") if "platform" in df.columns: platforms = sorted(str(p).title() for p in df["platform"].dropna().unique()) self.pdf.body_text(f"Platforms included: {', '.join(platforms)}") self.pdf.ln(5) self.pdf.callout_box( "Data source: Snowflake - SOCIAL_MEDIA_DB.ML_FEATURES.COMMENT_SENTIMENT_FEATURES " "and SOCIAL_MEDIA_DB.ML_FEATURES.MUSORA_COMMENT_SENTIMENT_FEATURES.\n" "This report is confidential and intended for internal Musora team use only.", bg_color=(245, 245, 245), )