Predict_Rating / app /services /report_service.py
vtdung23's picture
Upload folder using huggingface_hub
c09e844 verified
"""
Report Service
Generate PDF reports for batch predictions
"""
import io
from typing import List, Dict
from datetime import datetime
from pathlib import Path
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import (
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
PageBreak, Image, Preformatted
)
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from io import BytesIO
from PIL import Image as PILImage
from app.config import WORDCLOUD_DIR
class ReportService:
"""Service for generating PDF reports"""
def __init__(self):
self.styles = getSampleStyleSheet()
self._setup_custom_styles()
self._setup_fonts()
def _setup_fonts(self):
"""Setup fonts for Vietnamese character support"""
try:
# Try to use DejaVu font which supports Vietnamese characters
pdfmetrics.registerFont(TTFont('DejaVu', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf'))
# Register bold variant
pdfmetrics.registerFont(TTFont('DejaVuBold', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf'))
except Exception as e:
# If fonts not found, continue with default fonts
print(f"Warning: Could not load Vietnamese fonts: {e}")
def _setup_custom_styles(self):
"""Setup custom paragraph styles"""
# Use DejaVu font for Vietnamese support, fallback to Helvetica
font_name = 'DejaVu'
font_name_bold = 'DejaVuBold'
self.styles.add(ParagraphStyle(
name='CustomTitle',
parent=self.styles['Heading1'],
fontSize=24,
textColor=colors.HexColor('#4F46E5'),
spaceAfter=30,
alignment=TA_CENTER,
fontName=font_name_bold
))
self.styles.add(ParagraphStyle(
name='CustomHeading',
parent=self.styles['Heading2'],
fontSize=14,
textColor=colors.HexColor('#4F46E5'),
spaceAfter=12,
fontName=font_name_bold
))
self.styles.add(ParagraphStyle(
name='CustomNormal',
parent=self.styles['Normal'],
fontSize=10,
spaceAfter=6,
fontName=font_name
))
def generate_rating_distribution_chart(self) -> tuple:
"""
Generate a matplotlib chart for rating distribution
Returns:
tuple: (buffer, filename)
"""
pass
def generate_pdf_report(
self,
predictions: List[Dict],
distribution: Dict[int, int],
wordcloud_path: str,
username: str,
filename: str = None
) -> bytes:
"""
Generate comprehensive PDF report for batch predictions
Args:
predictions: List of prediction results with 'text', 'rating', 'confidence'
distribution: Rating distribution dict {rating: count}
wordcloud_path: Path to generated wordcloud image (URL or file path)
username: Username for the report
filename: Optional custom filename
Returns:
bytes: PDF file content
"""
# Create PDF in memory
pdf_buffer = io.BytesIO()
# Create document
doc = SimpleDocTemplate(
pdf_buffer,
pagesize=A4,
rightMargin=0.75*inch,
leftMargin=0.75*inch,
topMargin=0.75*inch,
bottomMargin=0.75*inch
)
# Build document content
story = []
# Title
title = Paragraph(
"Prediction Report",
self.styles['CustomTitle']
)
story.append(title)
story.append(Spacer(1, 0.3*inch))
# Report info
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
info_text = f"<b>Generated by:</b> {username}<br/><b>Date:</b> {timestamp}"
story.append(Paragraph(info_text, self.styles['CustomNormal']))
story.append(Spacer(1, 0.3*inch))
# Summary section
total_predictions = len(predictions)
avg_confidence = sum(p.get('confidence', 0) for p in predictions) / total_predictions if predictions else 0
summary_heading = Paragraph("Summary", self.styles['CustomHeading'])
story.append(summary_heading)
summary_data = [
['Metric', 'Value'],
['Total Predictions', str(total_predictions)],
['Average Confidence', f'{avg_confidence:.2%}'],
]
summary_table = Table(summary_data, colWidths=[3*inch, 2*inch])
summary_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'),
('FONTSIZE', (0, 0), (-1, 0), 12),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('FONTNAME', (0, 1), (-1, -1), 'DejaVu'),
('FONTSIZE', (0, 1), (-1, -1), 10)
]))
story.append(summary_table)
story.append(Spacer(1, 0.3*inch))
# Rating Distribution section
dist_heading = Paragraph("Rating Distribution", self.styles['CustomHeading'])
story.append(dist_heading)
dist_data = [['Rating', 'Count', 'Percentage']]
# Normalize distribution keys to integers (they might come as strings from JSON)
normalized_dist = {}
for key, value in distribution.items():
try:
int_key = int(key)
normalized_dist[int_key] = int(value)
except (ValueError, TypeError):
pass
total = sum(normalized_dist.values())
for rating in range(1, 6):
count = normalized_dist.get(rating, 0)
percentage = (count / total * 100) if total > 0 else 0
# Use star character β˜… instead of emoji
stars = "β˜…" * rating
dist_data.append([
f"{stars}",
str(count),
f"{percentage:.1f}%"
])
dist_table = Table(dist_data, colWidths=[1.5*inch, 1.5*inch, 1.5*inch])
dist_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'),
('FONTSIZE', (0, 0), (-1, 0), 11),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('FONTNAME', (0, 1), (-1, -1), 'DejaVu'),
('FONTSIZE', (0, 1), (-1, -1), 10)
]))
story.append(dist_table)
story.append(Spacer(1, 0.3*inch))
# Word Cloud section
if wordcloud_path:
wc_heading = Paragraph("Word Cloud Analysis", self.styles['CustomHeading'])
story.append(wc_heading)
try:
# Convert URL to file path if needed
file_path = wordcloud_path
if wordcloud_path.startswith('/'):
# It's a URL path, convert to file path
file_path = str(WORDCLOUD_DIR / wordcloud_path.split('/')[-1])
if Path(file_path).exists():
img = Image(file_path, width=5*inch, height=2.5*inch)
story.append(img)
story.append(Spacer(1, 0.2*inch))
wc_text = Paragraph(
"<i>Larger words indicate higher frequency in the comments</i>",
self.styles['CustomNormal']
)
story.append(wc_text)
except Exception as e:
print(f"Warning: Could not include word cloud: {e}")
story.append(Spacer(1, 0.3*inch))
# Page break before detailed results
story.append(PageBreak())
# Detailed Results section
results_heading = Paragraph("Detailed Results", self.styles['CustomHeading'])
story.append(results_heading)
story.append(Spacer(1, 0.2*inch))
# Results table
results_data = [['Comment', 'Rating', 'Confidence']]
for pred in predictions:
comment = pred.get('text', '')
rating = pred.get('rating', 0)
confidence = pred.get('confidence', 0)
# Create wrapped comment - let ReportLab handle wrapping
comment_paragraph = Paragraph(comment, self.styles['CustomNormal'])
# Use star character β˜… instead of emoji
stars = "β˜…" * rating
results_data.append([
comment_paragraph,
f"{stars}",
f"{confidence:.2%}"
])
# Create table with adjusted column widths - wider comment column for wrapping
results_table = Table(results_data, colWidths=[3.5*inch, 0.8*inch, 1.2*inch])
results_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (0, -1), 'LEFT'),
('ALIGN', (1, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'),
('FONTSIZE', (0, 0), (-1, 0), 10),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey),
('GRID', (0, 0), (-1, -1), 1, colors.grey),
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey]),
('FONTNAME', (0, 1), (-1, -1), 'DejaVu'),
('FONTSIZE', (0, 1), (-1, -1), 9),
('VALIGN', (0, 0), (-1, -1), 'TOP'), # Top alignment for wrapped text
('LEFTPADDING', (0, 0), (-1, -1), 8),
('RIGHTPADDING', (0, 0), (-1, -1), 8),
('TOPPADDING', (0, 0), (-1, -1), 6),
('BOTTOMPADDING', (0, 0), (-1, -1), 6),
]))
story.append(results_table)
# Build PDF
doc.build(story)
# Get PDF bytes
pdf_buffer.seek(0)
return pdf_buffer.getvalue()
def get_report_service() -> ReportService:
"""Dependency injection for report service"""
return ReportService()