import pandas as pd from reportlab.lib.pagesizes import letter, A4 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak from reportlab.lib import colors from reportlab.lib.enums import TA_CENTER, TA_LEFT from datetime import datetime from typing import List, Dict import os def generate_csv(data, path="report.csv"): """Legacy function - kept for backwards compatibility""" return generate_enhanced_csv(data, "Other", path) def generate_enhanced_csv( data: List[Dict], interviewee_type: str, path: str = "report.csv" ) -> str: """ Generate enhanced CSV with proper formatting and data validation """ if not data: # Create empty CSV with headers df = pd.DataFrame(columns=["Transcript ID", "Status"]) df.to_csv(path, index=False) return path # Create DataFrame df = pd.DataFrame(data) # Reorder columns for better readability priority_cols = ["Transcript ID", "File Name", "Quality Score", "Word Count"] other_cols = [col for col in df.columns if col not in priority_cols] ordered_cols = [col for col in priority_cols if col in df.columns] + other_cols df = df[ordered_cols] # Save with proper encoding df.to_csv(path, index=False, encoding='utf-8-sig') return path def generate_pdf(summary, details, path="report.pdf"): """Legacy function - kept for backwards compatibility""" # Create minimal results structure results = [{ "transcript_id": "Transcript 1", "file_name": "analysis.txt", "full_text": details, "quality_score": 0.8, "word_count": len(details.split()) }] return generate_enhanced_pdf(summary, results, "Other", [], path) def generate_enhanced_pdf( summary: str, results: List[Dict], interviewee_type: str, processing_errors: List[str], path: str = "report.pdf" ) -> str: """ Generate professional PDF report with proper formatting """ # Create document doc = SimpleDocTemplate( path, pagesize=letter, rightMargin=0.75*inch, leftMargin=0.75*inch, topMargin=0.75*inch, bottomMargin=0.75*inch ) # Container for the 'Flowable' objects story = [] # Define styles styles = getSampleStyleSheet() # Custom styles title_style = ParagraphStyle( 'CustomTitle', parent=styles['Heading1'], fontSize=24, textColor=colors.HexColor('#1a1a1a'), spaceAfter=30, alignment=TA_CENTER, fontName='Helvetica-Bold' ) heading_style = ParagraphStyle( 'CustomHeading', parent=styles['Heading2'], fontSize=16, textColor=colors.HexColor('#2c3e50'), spaceAfter=12, spaceBefore=20, fontName='Helvetica-Bold' ) subheading_style = ParagraphStyle( 'CustomSubheading', parent=styles['Heading3'], fontSize=13, textColor=colors.HexColor('#34495e'), spaceAfter=8, spaceBefore=12, fontName='Helvetica-Bold' ) body_style = ParagraphStyle( 'CustomBody', parent=styles['BodyText'], fontSize=11, leading=14, textColor=colors.HexColor('#2c3e50'), alignment=TA_LEFT ) # Title page story.append(Paragraph("Transcript Analysis Report", title_style)) story.append(Spacer(1, 0.2*inch)) # Metadata table metadata = [ ["Report Generated:", datetime.now().strftime("%B %d, %Y at %I:%M %p")], ["Interviewee Type:", interviewee_type], ["Total Transcripts:", str(len(results))], ["Successfully Processed:", str(sum(1 for r in results if r.get("quality_score", 0) > 0))] ] metadata_table = Table(metadata, colWidths=[2*inch, 4*inch]) metadata_table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (0, -1), colors.HexColor('#ecf0f1')), ('TEXTCOLOR', (0, 0), (-1, -1), colors.HexColor('#2c3e50')), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'), ('FONTSIZE', (0, 0), (-1, -1), 10), ('BOTTOMPADDING', (0, 0), (-1, -1), 8), ('TOPPADDING', (0, 0), (-1, -1), 8), ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#bdc3c7')) ])) story.append(metadata_table) story.append(Spacer(1, 0.3*inch)) # Executive Summary story.append(Paragraph("Executive Summary", heading_style)) story.append(Spacer(1, 0.1*inch)) # Split summary into paragraphs summary_paragraphs = summary.split('\n\n') for para in summary_paragraphs: if para.strip(): # Clean up text for PDF clean_para = para.strip().replace('&', '&').replace('<', '<').replace('>', '>') story.append(Paragraph(clean_para, body_style)) story.append(Spacer(1, 0.1*inch)) # Processing errors section (if any) if processing_errors: story.append(PageBreak()) story.append(Paragraph("Processing Issues", heading_style)) story.append(Spacer(1, 0.1*inch)) for error in processing_errors: clean_error = error.replace('&', '&').replace('<', '<').replace('>', '>') story.append(Paragraph(f"• {clean_error}", body_style)) story.append(Spacer(1, 0.05*inch)) # Individual transcript details story.append(PageBreak()) story.append(Paragraph("Detailed Transcript Analysis", heading_style)) story.append(Spacer(1, 0.2*inch)) for result in results: # Transcript header transcript_title = f"{result['transcript_id']} - {result['file_name']}" story.append(Paragraph(transcript_title, subheading_style)) # Stats stats_data = [ ["Quality Score:", f"{result['quality_score']:.2f}/1.00"], ["Word Count:", f"{result['word_count']:,}"] ] stats_table = Table(stats_data, colWidths=[1.5*inch, 2*inch]) stats_table.setStyle(TableStyle([ ('FONTSIZE', (0, 0), (-1, -1), 9), ('BOTTOMPADDING', (0, 0), (-1, -1), 4), ('TOPPADDING', (0, 0), (-1, -1), 4), ])) story.append(stats_table) story.append(Spacer(1, 0.1*inch)) # Analysis text text = result['full_text'] # Split into manageable chunks and clean chunks = text.split('\n\n') for chunk in chunks[:10]: # Limit to prevent overly long PDFs if chunk.strip(): clean_chunk = chunk.strip().replace('&', '&').replace('<', '<').replace('>', '>') # Limit paragraph length if len(clean_chunk) > 1000: clean_chunk = clean_chunk[:1000] + "..." story.append(Paragraph(clean_chunk, body_style)) story.append(Spacer(1, 0.1*inch)) story.append(Spacer(1, 0.2*inch)) # Page break between transcripts (except last) if result != results[-1]: story.append(PageBreak()) # Build PDF try: doc.build(story) return path except Exception as e: print(f"[PDF Error] Failed to generate PDF: {e}") # Create a minimal fallback PDF simple_doc = SimpleDocTemplate(path, pagesize=letter) simple_story = [ Paragraph("Transcript Analysis Report", title_style), Paragraph(f"Error generating full report: {str(e)}", body_style), Paragraph(summary, body_style) ] simple_doc.build(simple_story) return path