""" Report Service Generate PDF reports for batch predictions """ import io from typing import List, Dict from datetime import datetime from pathlib import Path from reportlab.lib.pagesizes import letter, A4 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch from reportlab.platypus import ( SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image, Preformatted ) from reportlab.lib import colors from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from io import BytesIO from PIL import Image as PILImage from app.config import WORDCLOUD_DIR class ReportService: """Service for generating PDF reports""" def __init__(self): self.styles = getSampleStyleSheet() self._setup_custom_styles() self._setup_fonts() def _setup_fonts(self): """Setup fonts for Vietnamese character support""" try: # Try to use DejaVu font which supports Vietnamese characters pdfmetrics.registerFont(TTFont('DejaVu', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf')) # Register bold variant pdfmetrics.registerFont(TTFont('DejaVuBold', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf')) except Exception as e: # If fonts not found, continue with default fonts print(f"Warning: Could not load Vietnamese fonts: {e}") def _setup_custom_styles(self): """Setup custom paragraph styles""" # Use DejaVu font for Vietnamese support, fallback to Helvetica font_name = 'DejaVu' font_name_bold = 'DejaVuBold' self.styles.add(ParagraphStyle( name='CustomTitle', parent=self.styles['Heading1'], fontSize=24, textColor=colors.HexColor('#4F46E5'), spaceAfter=30, alignment=TA_CENTER, fontName=font_name_bold )) self.styles.add(ParagraphStyle( name='CustomHeading', parent=self.styles['Heading2'], fontSize=14, textColor=colors.HexColor('#4F46E5'), spaceAfter=12, fontName=font_name_bold )) self.styles.add(ParagraphStyle( name='CustomNormal', parent=self.styles['Normal'], fontSize=10, spaceAfter=6, fontName=font_name )) def generate_rating_distribution_chart(self) -> tuple: """ Generate a matplotlib chart for rating distribution Returns: tuple: (buffer, filename) """ pass def generate_pdf_report( self, predictions: List[Dict], distribution: Dict[int, int], wordcloud_path: str, username: str, filename: str = None ) -> bytes: """ Generate comprehensive PDF report for batch predictions Args: predictions: List of prediction results with 'text', 'rating', 'confidence' distribution: Rating distribution dict {rating: count} wordcloud_path: Path to generated wordcloud image (URL or file path) username: Username for the report filename: Optional custom filename Returns: bytes: PDF file content """ # Create PDF in memory pdf_buffer = io.BytesIO() # Create document doc = SimpleDocTemplate( pdf_buffer, pagesize=A4, rightMargin=0.75*inch, leftMargin=0.75*inch, topMargin=0.75*inch, bottomMargin=0.75*inch ) # Build document content story = [] # Title title = Paragraph( "Prediction Report", self.styles['CustomTitle'] ) story.append(title) story.append(Spacer(1, 0.3*inch)) # Report info timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") info_text = f"Generated by: {username}
Date: {timestamp}" story.append(Paragraph(info_text, self.styles['CustomNormal'])) story.append(Spacer(1, 0.3*inch)) # Summary section total_predictions = len(predictions) avg_confidence = sum(p.get('confidence', 0) for p in predictions) / total_predictions if predictions else 0 summary_heading = Paragraph("Summary", self.styles['CustomHeading']) story.append(summary_heading) summary_data = [ ['Metric', 'Value'], ['Total Predictions', str(total_predictions)], ['Average Confidence', f'{avg_confidence:.2%}'], ] summary_table = Table(summary_data, colWidths=[3*inch, 2*inch]) summary_table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'), ('FONTSIZE', (0, 0), (-1, 0), 12), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), colors.beige), ('GRID', (0, 0), (-1, -1), 1, colors.black), ('FONTNAME', (0, 1), (-1, -1), 'DejaVu'), ('FONTSIZE', (0, 1), (-1, -1), 10) ])) story.append(summary_table) story.append(Spacer(1, 0.3*inch)) # Rating Distribution section dist_heading = Paragraph("Rating Distribution", self.styles['CustomHeading']) story.append(dist_heading) dist_data = [['Rating', 'Count', 'Percentage']] # Normalize distribution keys to integers (they might come as strings from JSON) normalized_dist = {} for key, value in distribution.items(): try: int_key = int(key) normalized_dist[int_key] = int(value) except (ValueError, TypeError): pass total = sum(normalized_dist.values()) for rating in range(1, 6): count = normalized_dist.get(rating, 0) percentage = (count / total * 100) if total > 0 else 0 # Use star character ★ instead of emoji stars = "★" * rating dist_data.append([ f"{stars}", str(count), f"{percentage:.1f}%" ]) dist_table = Table(dist_data, colWidths=[1.5*inch, 1.5*inch, 1.5*inch]) dist_table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'), ('FONTSIZE', (0, 0), (-1, 0), 11), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey), ('GRID', (0, 0), (-1, -1), 1, colors.black), ('FONTNAME', (0, 1), (-1, -1), 'DejaVu'), ('FONTSIZE', (0, 1), (-1, -1), 10) ])) story.append(dist_table) story.append(Spacer(1, 0.3*inch)) # Word Cloud section if wordcloud_path: wc_heading = Paragraph("Word Cloud Analysis", self.styles['CustomHeading']) story.append(wc_heading) try: # Convert URL to file path if needed file_path = wordcloud_path if wordcloud_path.startswith('/'): # It's a URL path, convert to file path file_path = str(WORDCLOUD_DIR / wordcloud_path.split('/')[-1]) if Path(file_path).exists(): img = Image(file_path, width=5*inch, height=2.5*inch) story.append(img) story.append(Spacer(1, 0.2*inch)) wc_text = Paragraph( "Larger words indicate higher frequency in the comments", self.styles['CustomNormal'] ) story.append(wc_text) except Exception as e: print(f"Warning: Could not include word cloud: {e}") story.append(Spacer(1, 0.3*inch)) # Page break before detailed results story.append(PageBreak()) # Detailed Results section results_heading = Paragraph("Detailed Results", self.styles['CustomHeading']) story.append(results_heading) story.append(Spacer(1, 0.2*inch)) # Results table results_data = [['Comment', 'Rating', 'Confidence']] for pred in predictions: comment = pred.get('text', '') rating = pred.get('rating', 0) confidence = pred.get('confidence', 0) # Create wrapped comment - let ReportLab handle wrapping comment_paragraph = Paragraph(comment, self.styles['CustomNormal']) # Use star character ★ instead of emoji stars = "★" * rating results_data.append([ comment_paragraph, f"{stars}", f"{confidence:.2%}" ]) # Create table with adjusted column widths - wider comment column for wrapping results_table = Table(results_data, colWidths=[3.5*inch, 0.8*inch, 1.2*inch]) results_table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), ('ALIGN', (0, 0), (0, -1), 'LEFT'), ('ALIGN', (1, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'), ('FONTSIZE', (0, 0), (-1, 0), 10), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey), ('GRID', (0, 0), (-1, -1), 1, colors.grey), ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey]), ('FONTNAME', (0, 1), (-1, -1), 'DejaVu'), ('FONTSIZE', (0, 1), (-1, -1), 9), ('VALIGN', (0, 0), (-1, -1), 'TOP'), # Top alignment for wrapped text ('LEFTPADDING', (0, 0), (-1, -1), 8), ('RIGHTPADDING', (0, 0), (-1, -1), 8), ('TOPPADDING', (0, 0), (-1, -1), 6), ('BOTTOMPADDING', (0, 0), (-1, -1), 6), ])) story.append(results_table) # Build PDF doc.build(story) # Get PDF bytes pdf_buffer.seek(0) return pdf_buffer.getvalue() def get_report_service() -> ReportService: """Dependency injection for report service""" return ReportService()