Spaces:
Build error
Build error
| """PDF DIAGNOSTIC REPORT GENERATOR | |
| Generates a parent-friendly PDF report with spectrograms, scores, | |
| interference patterns, and remediation recommendations. | |
| """ | |
| from __future__ import annotations | |
| import io | |
| import logging | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Any | |
| import numpy as np | |
| logger = logging.getLogger(__name__) | |
| def _generate_spectrogram_image(audio_path: str | Path) -> bytes | None: | |
| """Generate a publication-quality spectrogram PNG using librosa + matplotlib.""" | |
| try: | |
| import librosa | |
| import librosa.display | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| y, sr = librosa.load(str(audio_path), sr=22050) | |
| fig, axes = plt.subplots(3, 1, figsize=(10, 8), tight_layout=True) | |
| # Mel spectrogram | |
| mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) | |
| mel_db = librosa.power_to_db(mel, ref=np.max) | |
| img = librosa.display.specshow(mel_db, sr=sr, x_axis="time", y_axis="mel", ax=axes[0], cmap="magma") | |
| axes[0].set_title("Mel Spectrogram", fontsize=12, fontweight="bold", color="#333") | |
| fig.colorbar(img, ax=axes[0], format="%+2.0f dB") | |
| # Waveform | |
| librosa.display.waveshow(y, sr=sr, ax=axes[1], color="#0891b2") | |
| axes[1].set_title("Waveform", fontsize=12, fontweight="bold", color="#333") | |
| # MFCC | |
| mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
| img2 = librosa.display.specshow(mfcc, sr=sr, x_axis="time", ax=axes[2], cmap="coolwarm") | |
| axes[2].set_title("MFCC (Vocal Tract Shape)", fontsize=12, fontweight="bold", color="#333") | |
| fig.colorbar(img2, ax=axes[2]) | |
| buf = io.BytesIO() | |
| fig.savefig(buf, format="png", dpi=150, bbox_inches="tight", facecolor="white") | |
| plt.close(fig) | |
| buf.seek(0) | |
| return buf.read() | |
| except Exception as exc: | |
| logger.warning("Spectrogram image generation failed: %s", exc) | |
| return None | |
| def _generate_formant_plot(formant_data: dict[str, Any]) -> bytes | None: | |
| """Generate F1/F2 vowel space plot.""" | |
| try: | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| fig, ax = plt.subplots(1, 1, figsize=(6, 5)) | |
| f1_traj = formant_data.get("f1_trajectory", []) | |
| f2_traj = formant_data.get("f2_trajectory", []) | |
| if f1_traj and f2_traj: | |
| ax.scatter(f2_traj[:50], f1_traj[:50], alpha=0.3, s=8, color="#0891b2", label="Produced") | |
| # Plot English targets | |
| from modules.l1_targets import ENGLISH_VOWEL_FORMANTS | |
| for vowel, (f1, f2) in ENGLISH_VOWEL_FORMANTS.items(): | |
| ax.annotate(vowel, (f2, f1), fontsize=9, color="#ef4444", fontweight="bold", | |
| ha="center", va="center", | |
| bbox=dict(boxstyle="round,pad=0.2", facecolor="white", edgecolor="#ef4444", alpha=0.7)) | |
| ax.set_xlabel("F2 (Hz)", fontsize=11) | |
| ax.set_ylabel("F1 (Hz)", fontsize=11) | |
| ax.set_title("Vowel Space: Produced vs English Targets", fontsize=12, fontweight="bold") | |
| ax.invert_xaxis() | |
| ax.invert_yaxis() | |
| ax.legend(fontsize=9) | |
| ax.grid(True, alpha=0.3) | |
| buf = io.BytesIO() | |
| fig.savefig(buf, format="png", dpi=150, bbox_inches="tight", facecolor="white") | |
| plt.close(fig) | |
| buf.seek(0) | |
| return buf.read() | |
| except Exception as exc: | |
| logger.warning("Formant plot failed: %s", exc) | |
| return None | |
| def generate_pdf_report( | |
| profile: dict[str, Any], | |
| audio_path: str | Path | None = None, | |
| student_name: str = "Student", | |
| student_id: str = "", | |
| ) -> bytes: | |
| """Generate a comprehensive PDF diagnostic report.""" | |
| try: | |
| from reportlab.lib import colors | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib.units import mm, cm | |
| from reportlab.platypus import ( | |
| SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image, | |
| PageBreak, HRFlowable, | |
| ) | |
| from reportlab.lib.enums import TA_CENTER, TA_LEFT | |
| except ImportError: | |
| return _generate_simple_pdf(profile, student_name, student_id, audio_path) | |
| buf = io.BytesIO() | |
| doc = SimpleDocTemplate(buf, pagesize=A4, topMargin=1.5 * cm, bottomMargin=1.5 * cm) | |
| styles = getSampleStyleSheet() | |
| title_style = ParagraphStyle("Title2", parent=styles["Title"], fontSize=20, textColor=colors.HexColor("#0891b2")) | |
| heading_style = ParagraphStyle("Heading2b", parent=styles["Heading2"], textColor=colors.HexColor("#1e293b")) | |
| body_style = styles["Normal"] | |
| small_style = ParagraphStyle("Small", parent=body_style, fontSize=9, textColor=colors.grey) | |
| elements: list[Any] = [] | |
| # Title | |
| elements.append(Paragraph("Contrastive Acoustic Voice Profile", title_style)) | |
| elements.append(Paragraph("Diagnostic Report", styles["Heading3"])) | |
| elements.append(Spacer(1, 5 * mm)) | |
| elements.append(HRFlowable(width="100%", color=colors.HexColor("#0891b2"), thickness=2)) | |
| elements.append(Spacer(1, 5 * mm)) | |
| # Student info | |
| info_data = [ | |
| ["Student Name:", student_name, "Student ID:", student_id or "N/A"], | |
| ["Date:", datetime.now().strftime("%B %d, %Y"), "Language:", profile.get("transcription", {}).get("language", "N/A")], | |
| ] | |
| info_table = Table(info_data, colWidths=[80, 150, 80, 150]) | |
| info_table.setStyle(TableStyle([ | |
| ("FONTSIZE", (0, 0), (-1, -1), 10), | |
| ("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"), | |
| ("FONTNAME", (2, 0), (2, -1), "Helvetica-Bold"), | |
| ("BOTTOMPADDING", (0, 0), (-1, -1), 4), | |
| ])) | |
| elements.append(info_table) | |
| elements.append(Spacer(1, 8 * mm)) | |
| # Transcript | |
| transcript = profile.get("transcription", {}).get("text", "") | |
| if transcript: | |
| elements.append(Paragraph("What Was Said:", heading_style)) | |
| elements.append(Paragraph(f'"{transcript}"', ParagraphStyle("Quote", parent=body_style, fontSize=11, textColor=colors.HexColor("#334155"), leftIndent=10))) | |
| elements.append(Spacer(1, 5 * mm)) | |
| # Score summary | |
| elements.append(Paragraph("Score Summary", heading_style)) | |
| pa = profile.get("phoneme_analysis", {}) | |
| mb = profile.get("morpheme_boundary", {}) | |
| pp = profile.get("prosodic_profile", {}) | |
| cs = profile.get("connected_speech", {}) | |
| vq = profile.get("voice_quality", {}) | |
| l1_data = profile.get("l1_interference", profile.get("bhojpuri_interference", {})) | |
| l1_name = profile.get("l1_display_name", l1_data.get("l1_display_name", "L1")) | |
| score_data = [ | |
| ["Measure", "Score", "What It Means"], | |
| ["Phoneme Accuracy", f"{(pa.get('overall_accuracy', 0) * 100):.1f} / 100", "How correctly English sounds are produced"], | |
| ["L1 Interference", f"{pa.get('interference_score', 0):.1f} / 100", f"How much {l1_name} patterns affect English (lower = better)"], | |
| [f"{l1_name} Interference", f"{l1_data.get('l1_interference_score', l1_data.get('bhojpuri_interference_score', 0)):.1f} / 100", f"Specific {l1_name} sound patterns detected"], | |
| ["Prosodic Score", f"{pp.get('prosodic_score', 0):.1f} / 100", "Rhythm, stress, and intonation quality"], | |
| ["Fluency", f"{cs.get('fluency_score', 0):.1f} / 100", "How smoothly words connect together"], | |
| ["Cognitive Load", f"{mb.get('cognitive_load', {}).get('score', 0):.1f} / 100", "Mental effort during speech (lower = easier)"], | |
| ["Voice Quality", f"{vq.get('overall_quality_score', 0):.1f} / 100", "Overall voice health and clarity"], | |
| ] | |
| score_table = Table(score_data, colWidths=[120, 80, 260]) | |
| score_table.setStyle(TableStyle([ | |
| ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#0891b2")), | |
| ("TEXTCOLOR", (0, 0), (-1, 0), colors.white), | |
| ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), | |
| ("FONTSIZE", (0, 0), (-1, -1), 9), | |
| ("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e2e8f0")), | |
| ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8fafc")]), | |
| ("BOTTOMPADDING", (0, 0), (-1, -1), 6), | |
| ("TOPPADDING", (0, 0), (-1, -1), 6), | |
| ])) | |
| elements.append(score_table) | |
| elements.append(Spacer(1, 8 * mm)) | |
| # Spectrogram images | |
| if audio_path: | |
| spec_img = _generate_spectrogram_image(audio_path) | |
| if spec_img: | |
| elements.append(PageBreak()) | |
| elements.append(Paragraph("Voice Visualization", heading_style)) | |
| elements.append(Paragraph( | |
| "These images show your child's voice patterns. The colors represent energy at different frequencies.", | |
| small_style, | |
| )) | |
| elements.append(Spacer(1, 3 * mm)) | |
| elements.append(Image(io.BytesIO(spec_img), width=16 * cm, height=12 * cm)) | |
| elements.append(Spacer(1, 5 * mm)) | |
| formant_img = _generate_formant_plot(profile.get("feature_extraction", {}).get("parselmouth", {}).get("formants", {})) | |
| if formant_img: | |
| elements.append(Paragraph("Vowel Space", heading_style)) | |
| elements.append(Paragraph( | |
| "Blue dots show where your child's vowels land. Red labels show where English vowels should be. " | |
| "The gap between them shows which vowels need practice.", | |
| small_style, | |
| )) | |
| elements.append(Spacer(1, 3 * mm)) | |
| elements.append(Image(io.BytesIO(formant_img), width=12 * cm, height=10 * cm)) | |
| # L1 interference details | |
| if l1_data.get("detected_patterns"): | |
| elements.append(PageBreak()) | |
| elements.append(Paragraph(f"{l1_name} L1 Interference Patterns Detected", heading_style)) | |
| elements.append(Paragraph( | |
| f"These are specific patterns where your child's {l1_name} sounds are transferring into their English.", | |
| body_style, | |
| )) | |
| elements.append(Spacer(1, 3 * mm)) | |
| for pat in l1_data["detected_patterns"]: | |
| pat_data = [ | |
| ["Pattern:", pat.get("pattern", "").replace("_", " ").title()], | |
| ["Evidence:", pat.get("evidence", "")], | |
| ["Severity:", pat.get("severity", "")], | |
| ["What to Practice:", pat.get("remediation", "")], | |
| ] | |
| pat_table = Table(pat_data, colWidths=[100, 360]) | |
| pat_table.setStyle(TableStyle([ | |
| ("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"), | |
| ("FONTSIZE", (0, 0), (-1, -1), 9), | |
| ("BOTTOMPADDING", (0, 0), (-1, -1), 3), | |
| ("TOPPADDING", (0, 0), (-1, -1), 3), | |
| ("LINEBELOW", (0, -1), (-1, -1), 0.5, colors.HexColor("#e2e8f0")), | |
| ])) | |
| elements.append(pat_table) | |
| elements.append(Spacer(1, 4 * mm)) | |
| # Recommendations | |
| elements.append(Paragraph("Recommendations for Parents", heading_style)) | |
| recs = [ | |
| "Practice the specific sounds listed above for 10-15 minutes daily.", | |
| "Focus on one sound pattern per week.", | |
| "Read English storybooks aloud together — this builds natural rhythm.", | |
| "Record your child reading and play it back — self-monitoring helps.", | |
| "Praise effort, not perfection — confidence is key to speaking improvement.", | |
| ] | |
| if l1_data.get("detected_patterns"): | |
| for pat in l1_data["detected_patterns"]: | |
| if pat.get("remediation"): | |
| recs.append(pat["remediation"]) | |
| for i, rec in enumerate(recs, 1): | |
| elements.append(Paragraph(f"{i}. {rec}", body_style)) | |
| elements.append(Spacer(1, 2 * mm)) | |
| elements.append(Spacer(1, 10 * mm)) | |
| elements.append(HRFlowable(width="100%", color=colors.grey, thickness=0.5)) | |
| elements.append(Paragraph( | |
| f"Generated by Contrastive Acoustic Voice Profiling System | {datetime.now().strftime('%Y-%m-%d %H:%M')}", | |
| small_style, | |
| )) | |
| doc.build(elements) | |
| buf.seek(0) | |
| return buf.read() | |
| def _generate_simple_pdf( | |
| profile: dict[str, Any], | |
| student_name: str, | |
| student_id: str, | |
| audio_path: str | Path | None, | |
| ) -> bytes: | |
| """Fallback PDF generation without reportlab (plain text).""" | |
| import json | |
| lines = [ | |
| "CONTRASTIVE ACOUSTIC VOICE PROFILE - DIAGNOSTIC REPORT", | |
| "=" * 55, | |
| f"Student: {student_name}", | |
| f"ID: {student_id}", | |
| f"Date: {datetime.now().strftime('%B %d, %Y')}", | |
| "", | |
| "SCORES:", | |
| f" Phoneme Accuracy: {profile.get('phoneme_analysis', {}).get('overall_accuracy', 0) * 100:.1f}", | |
| f" L1 Interference: {profile.get('phoneme_analysis', {}).get('interference_score', 0):.1f}", | |
| f" Prosodic Score: {profile.get('prosodic_profile', {}).get('prosodic_score', 0):.1f}", | |
| f" Fluency: {profile.get('connected_speech', {}).get('fluency_score', 0):.1f}", | |
| f" Voice Quality: {profile.get('voice_quality', {}).get('overall_quality_score', 0):.1f}", | |
| "", | |
| ] | |
| l1_fb = profile.get("l1_interference", profile.get("bhojpuri_interference", {})) | |
| l1_fb_name = profile.get("l1_display_name", l1_fb.get("l1_display_name", "L1")) | |
| lines.append(f"{l1_fb_name.upper()} INTERFERENCE:") | |
| for pat in l1_fb.get("detected_patterns", []): | |
| lines.append(f" - {pat.get('pattern', '')}: {pat.get('evidence', '')}") | |
| lines.append(f" Practice: {pat.get('remediation', '')}") | |
| return "\n".join(lines).encode("utf-8") | |