Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from reportlab.lib.pagesizes import letter, A4 | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib.units import inch | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak | |
| from reportlab.lib import colors | |
| from reportlab.lib.enums import TA_CENTER, TA_LEFT | |
| from datetime import datetime | |
| from typing import List, Dict | |
| import os | |
| def generate_csv(data, path="report.csv"): | |
| """Legacy function - kept for backwards compatibility""" | |
| return generate_enhanced_csv(data, "Other", path) | |
| def generate_enhanced_csv( | |
| data: List[Dict], | |
| interviewee_type: str, | |
| path: str = "report.csv" | |
| ) -> str: | |
| """ | |
| Generate enhanced CSV with proper formatting and data validation | |
| """ | |
| if not data: | |
| # Create empty CSV with headers | |
| df = pd.DataFrame(columns=["Transcript ID", "Status"]) | |
| df.to_csv(path, index=False) | |
| return path | |
| # Create DataFrame | |
| df = pd.DataFrame(data) | |
| # Reorder columns for better readability | |
| priority_cols = ["Transcript ID", "File Name", "Quality Score", "Word Count"] | |
| other_cols = [col for col in df.columns if col not in priority_cols] | |
| ordered_cols = [col for col in priority_cols if col in df.columns] + other_cols | |
| df = df[ordered_cols] | |
| # Save with proper encoding | |
| df.to_csv(path, index=False, encoding='utf-8-sig') | |
| return path | |
| def generate_pdf(summary, details, path="report.pdf"): | |
| """Legacy function - kept for backwards compatibility""" | |
| # Create minimal results structure | |
| results = [{ | |
| "transcript_id": "Transcript 1", | |
| "file_name": "analysis.txt", | |
| "full_text": details, | |
| "quality_score": 0.8, | |
| "word_count": len(details.split()) | |
| }] | |
| return generate_enhanced_pdf(summary, results, "Other", [], path) | |
| def generate_enhanced_pdf( | |
| summary: str, | |
| results: List[Dict], | |
| interviewee_type: str, | |
| processing_errors: List[str], | |
| path: str = "report.pdf" | |
| ) -> str: | |
| """ | |
| Generate professional PDF report with proper formatting | |
| """ | |
| # Create document | |
| doc = SimpleDocTemplate( | |
| path, | |
| pagesize=letter, | |
| rightMargin=0.75*inch, | |
| leftMargin=0.75*inch, | |
| topMargin=0.75*inch, | |
| bottomMargin=0.75*inch | |
| ) | |
| # Container for the 'Flowable' objects | |
| story = [] | |
| # Define styles | |
| styles = getSampleStyleSheet() | |
| # Custom styles | |
| title_style = ParagraphStyle( | |
| 'CustomTitle', | |
| parent=styles['Heading1'], | |
| fontSize=24, | |
| textColor=colors.HexColor('#1a1a1a'), | |
| spaceAfter=30, | |
| alignment=TA_CENTER, | |
| fontName='Helvetica-Bold' | |
| ) | |
| heading_style = ParagraphStyle( | |
| 'CustomHeading', | |
| parent=styles['Heading2'], | |
| fontSize=16, | |
| textColor=colors.HexColor('#2c3e50'), | |
| spaceAfter=12, | |
| spaceBefore=20, | |
| fontName='Helvetica-Bold' | |
| ) | |
| subheading_style = ParagraphStyle( | |
| 'CustomSubheading', | |
| parent=styles['Heading3'], | |
| fontSize=13, | |
| textColor=colors.HexColor('#34495e'), | |
| spaceAfter=8, | |
| spaceBefore=12, | |
| fontName='Helvetica-Bold' | |
| ) | |
| body_style = ParagraphStyle( | |
| 'CustomBody', | |
| parent=styles['BodyText'], | |
| fontSize=11, | |
| leading=14, | |
| textColor=colors.HexColor('#2c3e50'), | |
| alignment=TA_LEFT | |
| ) | |
| # Title page | |
| story.append(Paragraph("Transcript Analysis Report", title_style)) | |
| story.append(Spacer(1, 0.2*inch)) | |
| # Metadata table | |
| metadata = [ | |
| ["Report Generated:", datetime.now().strftime("%B %d, %Y at %I:%M %p")], | |
| ["Interviewee Type:", interviewee_type], | |
| ["Total Transcripts:", str(len(results))], | |
| ["Successfully Processed:", str(sum(1 for r in results if r.get("quality_score", 0) > 0))] | |
| ] | |
| metadata_table = Table(metadata, colWidths=[2*inch, 4*inch]) | |
| metadata_table.setStyle(TableStyle([ | |
| ('BACKGROUND', (0, 0), (0, -1), colors.HexColor('#ecf0f1')), | |
| ('TEXTCOLOR', (0, 0), (-1, -1), colors.HexColor('#2c3e50')), | |
| ('ALIGN', (0, 0), (-1, -1), 'LEFT'), | |
| ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'), | |
| ('FONTSIZE', (0, 0), (-1, -1), 10), | |
| ('BOTTOMPADDING', (0, 0), (-1, -1), 8), | |
| ('TOPPADDING', (0, 0), (-1, -1), 8), | |
| ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#bdc3c7')) | |
| ])) | |
| story.append(metadata_table) | |
| story.append(Spacer(1, 0.3*inch)) | |
| # Executive Summary | |
| story.append(Paragraph("Executive Summary", heading_style)) | |
| story.append(Spacer(1, 0.1*inch)) | |
| # Split summary into paragraphs | |
| summary_paragraphs = summary.split('\n\n') | |
| for para in summary_paragraphs: | |
| if para.strip(): | |
| # Clean up text for PDF | |
| clean_para = para.strip().replace('&', '&').replace('<', '<').replace('>', '>') | |
| story.append(Paragraph(clean_para, body_style)) | |
| story.append(Spacer(1, 0.1*inch)) | |
| # Processing errors section (if any) | |
| if processing_errors: | |
| story.append(PageBreak()) | |
| story.append(Paragraph("Processing Issues", heading_style)) | |
| story.append(Spacer(1, 0.1*inch)) | |
| for error in processing_errors: | |
| clean_error = error.replace('&', '&').replace('<', '<').replace('>', '>') | |
| story.append(Paragraph(f"• {clean_error}", body_style)) | |
| story.append(Spacer(1, 0.05*inch)) | |
| # Individual transcript details | |
| story.append(PageBreak()) | |
| story.append(Paragraph("Detailed Transcript Analysis", heading_style)) | |
| story.append(Spacer(1, 0.2*inch)) | |
| for result in results: | |
| # Transcript header | |
| transcript_title = f"{result['transcript_id']} - {result['file_name']}" | |
| story.append(Paragraph(transcript_title, subheading_style)) | |
| # Stats | |
| stats_data = [ | |
| ["Quality Score:", f"{result['quality_score']:.2f}/1.00"], | |
| ["Word Count:", f"{result['word_count']:,}"] | |
| ] | |
| stats_table = Table(stats_data, colWidths=[1.5*inch, 2*inch]) | |
| stats_table.setStyle(TableStyle([ | |
| ('FONTSIZE', (0, 0), (-1, -1), 9), | |
| ('BOTTOMPADDING', (0, 0), (-1, -1), 4), | |
| ('TOPPADDING', (0, 0), (-1, -1), 4), | |
| ])) | |
| story.append(stats_table) | |
| story.append(Spacer(1, 0.1*inch)) | |
| # Analysis text | |
| text = result['full_text'] | |
| # Split into manageable chunks and clean | |
| chunks = text.split('\n\n') | |
| for chunk in chunks[:10]: # Limit to prevent overly long PDFs | |
| if chunk.strip(): | |
| clean_chunk = chunk.strip().replace('&', '&').replace('<', '<').replace('>', '>') | |
| # Limit paragraph length | |
| if len(clean_chunk) > 1000: | |
| clean_chunk = clean_chunk[:1000] + "..." | |
| story.append(Paragraph(clean_chunk, body_style)) | |
| story.append(Spacer(1, 0.1*inch)) | |
| story.append(Spacer(1, 0.2*inch)) | |
| # Page break between transcripts (except last) | |
| if result != results[-1]: | |
| story.append(PageBreak()) | |
| # Build PDF | |
| try: | |
| doc.build(story) | |
| return path | |
| except Exception as e: | |
| print(f"[PDF Error] Failed to generate PDF: {e}") | |
| # Create a minimal fallback PDF | |
| simple_doc = SimpleDocTemplate(path, pagesize=letter) | |
| simple_story = [ | |
| Paragraph("Transcript Analysis Report", title_style), | |
| Paragraph(f"Error generating full report: {str(e)}", body_style), | |
| Paragraph(summary, body_style) | |
| ] | |
| simple_doc.build(simple_story) | |
| return path |