| import pandas as pd
|
| from reportlab.lib.pagesizes import letter, A4
|
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| from reportlab.lib.units import inch
|
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
| from reportlab.lib import colors
|
| from reportlab.lib.enums import TA_CENTER, TA_LEFT
|
| from datetime import datetime
|
| from typing import List, Dict
|
| import os
|
|
|
| def generate_csv(data, path="report.csv"):
|
| """Legacy function - kept for backwards compatibility"""
|
| return generate_enhanced_csv(data, "Other", path)
|
|
|
|
|
| def generate_enhanced_csv(
|
| data: List[Dict],
|
| interviewee_type: str,
|
| path: str = "report.csv"
|
| ) -> str:
|
| """
|
| Generate enhanced CSV with proper formatting and data validation
|
| """
|
|
|
| if not data:
|
|
|
| df = pd.DataFrame(columns=["Transcript ID", "Status"])
|
| df.to_csv(path, index=False)
|
| return path
|
|
|
|
|
| df = pd.DataFrame(data)
|
|
|
|
|
| priority_cols = ["Transcript ID", "File Name", "Quality Score", "Word Count"]
|
| other_cols = [col for col in df.columns if col not in priority_cols]
|
| ordered_cols = [col for col in priority_cols if col in df.columns] + other_cols
|
|
|
| df = df[ordered_cols]
|
|
|
|
|
| df.to_csv(path, index=False, encoding='utf-8-sig')
|
|
|
| return path
|
|
|
|
|
| def generate_pdf(summary, details, path="report.pdf"):
|
| """Legacy function - kept for backwards compatibility"""
|
|
|
| results = [{
|
| "transcript_id": "Transcript 1",
|
| "file_name": "analysis.txt",
|
| "full_text": details,
|
| "quality_score": 0.8,
|
| "word_count": len(details.split())
|
| }]
|
| return generate_enhanced_pdf(summary, results, "Other", [], path)
|
|
|
|
|
| def generate_enhanced_pdf(
|
| summary: str,
|
| results: List[Dict],
|
| interviewee_type: str,
|
| processing_errors: List[str],
|
| path: str = "report.pdf"
|
| ) -> str:
|
| """
|
| Generate professional PDF report with proper formatting
|
| """
|
|
|
|
|
| doc = SimpleDocTemplate(
|
| path,
|
| pagesize=letter,
|
| rightMargin=0.75*inch,
|
| leftMargin=0.75*inch,
|
| topMargin=0.75*inch,
|
| bottomMargin=0.75*inch
|
| )
|
|
|
|
|
| story = []
|
|
|
|
|
| styles = getSampleStyleSheet()
|
|
|
|
|
| title_style = ParagraphStyle(
|
| 'CustomTitle',
|
| parent=styles['Heading1'],
|
| fontSize=24,
|
| textColor=colors.HexColor('#1a1a1a'),
|
| spaceAfter=30,
|
| alignment=TA_CENTER,
|
| fontName='Helvetica-Bold'
|
| )
|
|
|
| heading_style = ParagraphStyle(
|
| 'CustomHeading',
|
| parent=styles['Heading2'],
|
| fontSize=16,
|
| textColor=colors.HexColor('#2c3e50'),
|
| spaceAfter=12,
|
| spaceBefore=20,
|
| fontName='Helvetica-Bold'
|
| )
|
|
|
| subheading_style = ParagraphStyle(
|
| 'CustomSubheading',
|
| parent=styles['Heading3'],
|
| fontSize=13,
|
| textColor=colors.HexColor('#34495e'),
|
| spaceAfter=8,
|
| spaceBefore=12,
|
| fontName='Helvetica-Bold'
|
| )
|
|
|
| body_style = ParagraphStyle(
|
| 'CustomBody',
|
| parent=styles['BodyText'],
|
| fontSize=11,
|
| leading=14,
|
| textColor=colors.HexColor('#2c3e50'),
|
| alignment=TA_LEFT
|
| )
|
|
|
|
|
| story.append(Paragraph("Transcript Analysis Report", title_style))
|
| story.append(Spacer(1, 0.2*inch))
|
|
|
|
|
| metadata = [
|
| ["Report Generated:", datetime.now().strftime("%B %d, %Y at %I:%M %p")],
|
| ["Interviewee Type:", interviewee_type],
|
| ["Total Transcripts:", str(len(results))],
|
| ["Successfully Processed:", str(sum(1 for r in results if r.get("quality_score", 0) > 0))]
|
| ]
|
|
|
| metadata_table = Table(metadata, colWidths=[2*inch, 4*inch])
|
| metadata_table.setStyle(TableStyle([
|
| ('BACKGROUND', (0, 0), (0, -1), colors.HexColor('#ecf0f1')),
|
| ('TEXTCOLOR', (0, 0), (-1, -1), colors.HexColor('#2c3e50')),
|
| ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
| ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
|
| ('FONTSIZE', (0, 0), (-1, -1), 10),
|
| ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
|
| ('TOPPADDING', (0, 0), (-1, -1), 8),
|
| ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#bdc3c7'))
|
| ]))
|
|
|
| story.append(metadata_table)
|
| story.append(Spacer(1, 0.3*inch))
|
|
|
|
|
| story.append(Paragraph("Executive Summary", heading_style))
|
| story.append(Spacer(1, 0.1*inch))
|
|
|
|
|
| summary_paragraphs = summary.split('\n\n')
|
| for para in summary_paragraphs:
|
| if para.strip():
|
|
|
| clean_para = para.strip().replace('&', '&').replace('<', '<').replace('>', '>')
|
| story.append(Paragraph(clean_para, body_style))
|
| story.append(Spacer(1, 0.1*inch))
|
|
|
|
|
| if processing_errors:
|
| story.append(PageBreak())
|
| story.append(Paragraph("Processing Issues", heading_style))
|
| story.append(Spacer(1, 0.1*inch))
|
|
|
| for error in processing_errors:
|
| clean_error = error.replace('&', '&').replace('<', '<').replace('>', '>')
|
| story.append(Paragraph(f"• {clean_error}", body_style))
|
| story.append(Spacer(1, 0.05*inch))
|
|
|
|
|
| story.append(PageBreak())
|
| story.append(Paragraph("Detailed Transcript Analysis", heading_style))
|
| story.append(Spacer(1, 0.2*inch))
|
|
|
| for result in results:
|
|
|
| transcript_title = f"{result['transcript_id']} - {result['file_name']}"
|
| story.append(Paragraph(transcript_title, subheading_style))
|
|
|
|
|
| stats_data = [
|
| ["Quality Score:", f"{result['quality_score']:.2f}/1.00"],
|
| ["Word Count:", f"{result['word_count']:,}"]
|
| ]
|
|
|
| stats_table = Table(stats_data, colWidths=[1.5*inch, 2*inch])
|
| stats_table.setStyle(TableStyle([
|
| ('FONTSIZE', (0, 0), (-1, -1), 9),
|
| ('BOTTOMPADDING', (0, 0), (-1, -1), 4),
|
| ('TOPPADDING', (0, 0), (-1, -1), 4),
|
| ]))
|
|
|
| story.append(stats_table)
|
| story.append(Spacer(1, 0.1*inch))
|
|
|
|
|
| text = result['full_text']
|
|
|
|
|
| chunks = text.split('\n\n')
|
| for chunk in chunks[:10]:
|
| if chunk.strip():
|
| clean_chunk = chunk.strip().replace('&', '&').replace('<', '<').replace('>', '>')
|
|
|
| if len(clean_chunk) > 1000:
|
| clean_chunk = clean_chunk[:1000] + "..."
|
| story.append(Paragraph(clean_chunk, body_style))
|
| story.append(Spacer(1, 0.1*inch))
|
|
|
| story.append(Spacer(1, 0.2*inch))
|
|
|
|
|
| if result != results[-1]:
|
| story.append(PageBreak())
|
|
|
|
|
| try:
|
| doc.build(story)
|
| return path
|
| except Exception as e:
|
| print(f"[PDF Error] Failed to generate PDF: {e}")
|
|
|
| simple_doc = SimpleDocTemplate(path, pagesize=letter)
|
| simple_story = [
|
| Paragraph("Transcript Analysis Report", title_style),
|
| Paragraph(f"Error generating full report: {str(e)}", body_style),
|
| Paragraph(summary, body_style)
|
| ]
|
| simple_doc.build(simple_story)
|
| return path |