vaani-cavp-engine / modules /report_generator.py
Shaankar39's picture
init: Vaani CAVP engine (CPU, accuracy-first — Whisper large-v3, spaCy trf)
7d5f092
"""PDF DIAGNOSTIC REPORT GENERATOR
Generates a parent-friendly PDF report with spectrograms, scores,
interference patterns, and remediation recommendations.
"""
from __future__ import annotations
import io
import logging
from datetime import datetime
from pathlib import Path
from typing import Any
import numpy as np
logger = logging.getLogger(__name__)
def _generate_spectrogram_image(audio_path: str | Path) -> bytes | None:
"""Generate a publication-quality spectrogram PNG using librosa + matplotlib."""
try:
import librosa
import librosa.display
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
y, sr = librosa.load(str(audio_path), sr=22050)
fig, axes = plt.subplots(3, 1, figsize=(10, 8), tight_layout=True)
# Mel spectrogram
mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
mel_db = librosa.power_to_db(mel, ref=np.max)
img = librosa.display.specshow(mel_db, sr=sr, x_axis="time", y_axis="mel", ax=axes[0], cmap="magma")
axes[0].set_title("Mel Spectrogram", fontsize=12, fontweight="bold", color="#333")
fig.colorbar(img, ax=axes[0], format="%+2.0f dB")
# Waveform
librosa.display.waveshow(y, sr=sr, ax=axes[1], color="#0891b2")
axes[1].set_title("Waveform", fontsize=12, fontweight="bold", color="#333")
# MFCC
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
img2 = librosa.display.specshow(mfcc, sr=sr, x_axis="time", ax=axes[2], cmap="coolwarm")
axes[2].set_title("MFCC (Vocal Tract Shape)", fontsize=12, fontweight="bold", color="#333")
fig.colorbar(img2, ax=axes[2])
buf = io.BytesIO()
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight", facecolor="white")
plt.close(fig)
buf.seek(0)
return buf.read()
except Exception as exc:
logger.warning("Spectrogram image generation failed: %s", exc)
return None
def _generate_formant_plot(formant_data: dict[str, Any]) -> bytes | None:
"""Generate F1/F2 vowel space plot."""
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1, figsize=(6, 5))
f1_traj = formant_data.get("f1_trajectory", [])
f2_traj = formant_data.get("f2_trajectory", [])
if f1_traj and f2_traj:
ax.scatter(f2_traj[:50], f1_traj[:50], alpha=0.3, s=8, color="#0891b2", label="Produced")
# Plot English targets
from modules.l1_targets import ENGLISH_VOWEL_FORMANTS
for vowel, (f1, f2) in ENGLISH_VOWEL_FORMANTS.items():
ax.annotate(vowel, (f2, f1), fontsize=9, color="#ef4444", fontweight="bold",
ha="center", va="center",
bbox=dict(boxstyle="round,pad=0.2", facecolor="white", edgecolor="#ef4444", alpha=0.7))
ax.set_xlabel("F2 (Hz)", fontsize=11)
ax.set_ylabel("F1 (Hz)", fontsize=11)
ax.set_title("Vowel Space: Produced vs English Targets", fontsize=12, fontweight="bold")
ax.invert_xaxis()
ax.invert_yaxis()
ax.legend(fontsize=9)
ax.grid(True, alpha=0.3)
buf = io.BytesIO()
fig.savefig(buf, format="png", dpi=150, bbox_inches="tight", facecolor="white")
plt.close(fig)
buf.seek(0)
return buf.read()
except Exception as exc:
logger.warning("Formant plot failed: %s", exc)
return None
def generate_pdf_report(
profile: dict[str, Any],
audio_path: str | Path | None = None,
student_name: str = "Student",
student_id: str = "",
) -> bytes:
"""Generate a comprehensive PDF diagnostic report."""
try:
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import mm, cm
from reportlab.platypus import (
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image,
PageBreak, HRFlowable,
)
from reportlab.lib.enums import TA_CENTER, TA_LEFT
except ImportError:
return _generate_simple_pdf(profile, student_name, student_id, audio_path)
buf = io.BytesIO()
doc = SimpleDocTemplate(buf, pagesize=A4, topMargin=1.5 * cm, bottomMargin=1.5 * cm)
styles = getSampleStyleSheet()
title_style = ParagraphStyle("Title2", parent=styles["Title"], fontSize=20, textColor=colors.HexColor("#0891b2"))
heading_style = ParagraphStyle("Heading2b", parent=styles["Heading2"], textColor=colors.HexColor("#1e293b"))
body_style = styles["Normal"]
small_style = ParagraphStyle("Small", parent=body_style, fontSize=9, textColor=colors.grey)
elements: list[Any] = []
# Title
elements.append(Paragraph("Contrastive Acoustic Voice Profile", title_style))
elements.append(Paragraph("Diagnostic Report", styles["Heading3"]))
elements.append(Spacer(1, 5 * mm))
elements.append(HRFlowable(width="100%", color=colors.HexColor("#0891b2"), thickness=2))
elements.append(Spacer(1, 5 * mm))
# Student info
info_data = [
["Student Name:", student_name, "Student ID:", student_id or "N/A"],
["Date:", datetime.now().strftime("%B %d, %Y"), "Language:", profile.get("transcription", {}).get("language", "N/A")],
]
info_table = Table(info_data, colWidths=[80, 150, 80, 150])
info_table.setStyle(TableStyle([
("FONTSIZE", (0, 0), (-1, -1), 10),
("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
("FONTNAME", (2, 0), (2, -1), "Helvetica-Bold"),
("BOTTOMPADDING", (0, 0), (-1, -1), 4),
]))
elements.append(info_table)
elements.append(Spacer(1, 8 * mm))
# Transcript
transcript = profile.get("transcription", {}).get("text", "")
if transcript:
elements.append(Paragraph("What Was Said:", heading_style))
elements.append(Paragraph(f'"{transcript}"', ParagraphStyle("Quote", parent=body_style, fontSize=11, textColor=colors.HexColor("#334155"), leftIndent=10)))
elements.append(Spacer(1, 5 * mm))
# Score summary
elements.append(Paragraph("Score Summary", heading_style))
pa = profile.get("phoneme_analysis", {})
mb = profile.get("morpheme_boundary", {})
pp = profile.get("prosodic_profile", {})
cs = profile.get("connected_speech", {})
vq = profile.get("voice_quality", {})
l1_data = profile.get("l1_interference", profile.get("bhojpuri_interference", {}))
l1_name = profile.get("l1_display_name", l1_data.get("l1_display_name", "L1"))
score_data = [
["Measure", "Score", "What It Means"],
["Phoneme Accuracy", f"{(pa.get('overall_accuracy', 0) * 100):.1f} / 100", "How correctly English sounds are produced"],
["L1 Interference", f"{pa.get('interference_score', 0):.1f} / 100", f"How much {l1_name} patterns affect English (lower = better)"],
[f"{l1_name} Interference", f"{l1_data.get('l1_interference_score', l1_data.get('bhojpuri_interference_score', 0)):.1f} / 100", f"Specific {l1_name} sound patterns detected"],
["Prosodic Score", f"{pp.get('prosodic_score', 0):.1f} / 100", "Rhythm, stress, and intonation quality"],
["Fluency", f"{cs.get('fluency_score', 0):.1f} / 100", "How smoothly words connect together"],
["Cognitive Load", f"{mb.get('cognitive_load', {}).get('score', 0):.1f} / 100", "Mental effort during speech (lower = easier)"],
["Voice Quality", f"{vq.get('overall_quality_score', 0):.1f} / 100", "Overall voice health and clarity"],
]
score_table = Table(score_data, colWidths=[120, 80, 260])
score_table.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#0891b2")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 9),
("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e2e8f0")),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8fafc")]),
("BOTTOMPADDING", (0, 0), (-1, -1), 6),
("TOPPADDING", (0, 0), (-1, -1), 6),
]))
elements.append(score_table)
elements.append(Spacer(1, 8 * mm))
# Spectrogram images
if audio_path:
spec_img = _generate_spectrogram_image(audio_path)
if spec_img:
elements.append(PageBreak())
elements.append(Paragraph("Voice Visualization", heading_style))
elements.append(Paragraph(
"These images show your child's voice patterns. The colors represent energy at different frequencies.",
small_style,
))
elements.append(Spacer(1, 3 * mm))
elements.append(Image(io.BytesIO(spec_img), width=16 * cm, height=12 * cm))
elements.append(Spacer(1, 5 * mm))
formant_img = _generate_formant_plot(profile.get("feature_extraction", {}).get("parselmouth", {}).get("formants", {}))
if formant_img:
elements.append(Paragraph("Vowel Space", heading_style))
elements.append(Paragraph(
"Blue dots show where your child's vowels land. Red labels show where English vowels should be. "
"The gap between them shows which vowels need practice.",
small_style,
))
elements.append(Spacer(1, 3 * mm))
elements.append(Image(io.BytesIO(formant_img), width=12 * cm, height=10 * cm))
# L1 interference details
if l1_data.get("detected_patterns"):
elements.append(PageBreak())
elements.append(Paragraph(f"{l1_name} L1 Interference Patterns Detected", heading_style))
elements.append(Paragraph(
f"These are specific patterns where your child's {l1_name} sounds are transferring into their English.",
body_style,
))
elements.append(Spacer(1, 3 * mm))
for pat in l1_data["detected_patterns"]:
pat_data = [
["Pattern:", pat.get("pattern", "").replace("_", " ").title()],
["Evidence:", pat.get("evidence", "")],
["Severity:", pat.get("severity", "")],
["What to Practice:", pat.get("remediation", "")],
]
pat_table = Table(pat_data, colWidths=[100, 360])
pat_table.setStyle(TableStyle([
("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 9),
("BOTTOMPADDING", (0, 0), (-1, -1), 3),
("TOPPADDING", (0, 0), (-1, -1), 3),
("LINEBELOW", (0, -1), (-1, -1), 0.5, colors.HexColor("#e2e8f0")),
]))
elements.append(pat_table)
elements.append(Spacer(1, 4 * mm))
# Recommendations
elements.append(Paragraph("Recommendations for Parents", heading_style))
recs = [
"Practice the specific sounds listed above for 10-15 minutes daily.",
"Focus on one sound pattern per week.",
"Read English storybooks aloud together — this builds natural rhythm.",
"Record your child reading and play it back — self-monitoring helps.",
"Praise effort, not perfection — confidence is key to speaking improvement.",
]
if l1_data.get("detected_patterns"):
for pat in l1_data["detected_patterns"]:
if pat.get("remediation"):
recs.append(pat["remediation"])
for i, rec in enumerate(recs, 1):
elements.append(Paragraph(f"{i}. {rec}", body_style))
elements.append(Spacer(1, 2 * mm))
elements.append(Spacer(1, 10 * mm))
elements.append(HRFlowable(width="100%", color=colors.grey, thickness=0.5))
elements.append(Paragraph(
f"Generated by Contrastive Acoustic Voice Profiling System | {datetime.now().strftime('%Y-%m-%d %H:%M')}",
small_style,
))
doc.build(elements)
buf.seek(0)
return buf.read()
def _generate_simple_pdf(
profile: dict[str, Any],
student_name: str,
student_id: str,
audio_path: str | Path | None,
) -> bytes:
"""Fallback PDF generation without reportlab (plain text)."""
import json
lines = [
"CONTRASTIVE ACOUSTIC VOICE PROFILE - DIAGNOSTIC REPORT",
"=" * 55,
f"Student: {student_name}",
f"ID: {student_id}",
f"Date: {datetime.now().strftime('%B %d, %Y')}",
"",
"SCORES:",
f" Phoneme Accuracy: {profile.get('phoneme_analysis', {}).get('overall_accuracy', 0) * 100:.1f}",
f" L1 Interference: {profile.get('phoneme_analysis', {}).get('interference_score', 0):.1f}",
f" Prosodic Score: {profile.get('prosodic_profile', {}).get('prosodic_score', 0):.1f}",
f" Fluency: {profile.get('connected_speech', {}).get('fluency_score', 0):.1f}",
f" Voice Quality: {profile.get('voice_quality', {}).get('overall_quality_score', 0):.1f}",
"",
]
l1_fb = profile.get("l1_interference", profile.get("bhojpuri_interference", {}))
l1_fb_name = profile.get("l1_display_name", l1_fb.get("l1_display_name", "L1"))
lines.append(f"{l1_fb_name.upper()} INTERFERENCE:")
for pat in l1_fb.get("detected_patterns", []):
lines.append(f" - {pat.get('pattern', '')}: {pat.get('evidence', '')}")
lines.append(f" Practice: {pat.get('remediation', '')}")
return "\n".join(lines).encode("utf-8")