Spaces:

Shaankar39
/

vaani-cavp-engine

Build error

App Files Files Community

vaani-cavp-engine / modules /report_generator.py

Shaankar39

init: Vaani CAVP engine (CPU, accuracy-first — Whisper large-v3, spaCy trf)

7d5f092 about 1 month ago

raw

history blame contribute delete

13.5 kB

	"""PDF DIAGNOSTIC REPORT GENERATOR
	Generates a parent-friendly PDF report with spectrograms, scores,
	interference patterns, and remediation recommendations.
	"""

	from __future__ import annotations

	import io
	import logging
	from datetime import datetime
	from pathlib import Path
	from typing import Any

	import numpy as np

	logger = logging.getLogger(__name__)


	def _generate_spectrogram_image(audio_path: str \| Path) -> bytes \| None:
	"""Generate a publication-quality spectrogram PNG using librosa + matplotlib."""
	try:
	import librosa
	import librosa.display
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt

	y, sr = librosa.load(str(audio_path), sr=22050)
	fig, axes = plt.subplots(3, 1, figsize=(10, 8), tight_layout=True)

	# Mel spectrogram
	mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
	mel_db = librosa.power_to_db(mel, ref=np.max)
	img = librosa.display.specshow(mel_db, sr=sr, x_axis="time", y_axis="mel", ax=axes[0], cmap="magma")
	axes[0].set_title("Mel Spectrogram", fontsize=12, fontweight="bold", color="#333")
	fig.colorbar(img, ax=axes[0], format="%+2.0f dB")

	# Waveform
	librosa.display.waveshow(y, sr=sr, ax=axes[1], color="#0891b2")
	axes[1].set_title("Waveform", fontsize=12, fontweight="bold", color="#333")

	# MFCC
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
	img2 = librosa.display.specshow(mfcc, sr=sr, x_axis="time", ax=axes[2], cmap="coolwarm")
	axes[2].set_title("MFCC (Vocal Tract Shape)", fontsize=12, fontweight="bold", color="#333")
	fig.colorbar(img2, ax=axes[2])

	buf = io.BytesIO()
	fig.savefig(buf, format="png", dpi=150, bbox_inches="tight", facecolor="white")
	plt.close(fig)
	buf.seek(0)
	return buf.read()
	except Exception as exc:
	logger.warning("Spectrogram image generation failed: %s", exc)
	return None


	def _generate_formant_plot(formant_data: dict[str, Any]) -> bytes \| None:
	"""Generate F1/F2 vowel space plot."""
	try:
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt

	fig, ax = plt.subplots(1, 1, figsize=(6, 5))

	f1_traj = formant_data.get("f1_trajectory", [])
	f2_traj = formant_data.get("f2_trajectory", [])

	if f1_traj and f2_traj:
	ax.scatter(f2_traj[:50], f1_traj[:50], alpha=0.3, s=8, color="#0891b2", label="Produced")

	# Plot English targets
	from modules.l1_targets import ENGLISH_VOWEL_FORMANTS
	for vowel, (f1, f2) in ENGLISH_VOWEL_FORMANTS.items():
	ax.annotate(vowel, (f2, f1), fontsize=9, color="#ef4444", fontweight="bold",
	ha="center", va="center",
	bbox=dict(boxstyle="round,pad=0.2", facecolor="white", edgecolor="#ef4444", alpha=0.7))

	ax.set_xlabel("F2 (Hz)", fontsize=11)
	ax.set_ylabel("F1 (Hz)", fontsize=11)
	ax.set_title("Vowel Space: Produced vs English Targets", fontsize=12, fontweight="bold")
	ax.invert_xaxis()
	ax.invert_yaxis()
	ax.legend(fontsize=9)
	ax.grid(True, alpha=0.3)

	buf = io.BytesIO()
	fig.savefig(buf, format="png", dpi=150, bbox_inches="tight", facecolor="white")
	plt.close(fig)
	buf.seek(0)
	return buf.read()
	except Exception as exc:
	logger.warning("Formant plot failed: %s", exc)
	return None


	def generate_pdf_report(
	profile: dict[str, Any],
	audio_path: str \| Path \| None = None,
	student_name: str = "Student",
	student_id: str = "",
	) -> bytes:
	"""Generate a comprehensive PDF diagnostic report."""
	try:
	from reportlab.lib import colors
	from reportlab.lib.pagesizes import A4
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import mm, cm
	from reportlab.platypus import (
	SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image,
	PageBreak, HRFlowable,
	)
	from reportlab.lib.enums import TA_CENTER, TA_LEFT
	except ImportError:
	return _generate_simple_pdf(profile, student_name, student_id, audio_path)

	buf = io.BytesIO()
	doc = SimpleDocTemplate(buf, pagesize=A4, topMargin=1.5 * cm, bottomMargin=1.5 * cm)

	styles = getSampleStyleSheet()
	title_style = ParagraphStyle("Title2", parent=styles["Title"], fontSize=20, textColor=colors.HexColor("#0891b2"))
	heading_style = ParagraphStyle("Heading2b", parent=styles["Heading2"], textColor=colors.HexColor("#1e293b"))
	body_style = styles["Normal"]
	small_style = ParagraphStyle("Small", parent=body_style, fontSize=9, textColor=colors.grey)

	elements: list[Any] = []

	# Title
	elements.append(Paragraph("Contrastive Acoustic Voice Profile", title_style))
	elements.append(Paragraph("Diagnostic Report", styles["Heading3"]))
	elements.append(Spacer(1, 5 * mm))
	elements.append(HRFlowable(width="100%", color=colors.HexColor("#0891b2"), thickness=2))
	elements.append(Spacer(1, 5 * mm))

	# Student info
	info_data = [
	["Student Name:", student_name, "Student ID:", student_id or "N/A"],
	["Date:", datetime.now().strftime("%B %d, %Y"), "Language:", profile.get("transcription", {}).get("language", "N/A")],
	]
	info_table = Table(info_data, colWidths=[80, 150, 80, 150])
	info_table.setStyle(TableStyle([
	("FONTSIZE", (0, 0), (-1, -1), 10),
	("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
	("FONTNAME", (2, 0), (2, -1), "Helvetica-Bold"),
	("BOTTOMPADDING", (0, 0), (-1, -1), 4),
	]))
	elements.append(info_table)
	elements.append(Spacer(1, 8 * mm))

	# Transcript
	transcript = profile.get("transcription", {}).get("text", "")
	if transcript:
	elements.append(Paragraph("What Was Said:", heading_style))
	elements.append(Paragraph(f'"{transcript}"', ParagraphStyle("Quote", parent=body_style, fontSize=11, textColor=colors.HexColor("#334155"), leftIndent=10)))
	elements.append(Spacer(1, 5 * mm))

	# Score summary
	elements.append(Paragraph("Score Summary", heading_style))
	pa = profile.get("phoneme_analysis", {})
	mb = profile.get("morpheme_boundary", {})
	pp = profile.get("prosodic_profile", {})
	cs = profile.get("connected_speech", {})
	vq = profile.get("voice_quality", {})
	l1_data = profile.get("l1_interference", profile.get("bhojpuri_interference", {}))
	l1_name = profile.get("l1_display_name", l1_data.get("l1_display_name", "L1"))

	score_data = [
	["Measure", "Score", "What It Means"],
	["Phoneme Accuracy", f"{(pa.get('overall_accuracy', 0) * 100):.1f} / 100", "How correctly English sounds are produced"],
	["L1 Interference", f"{pa.get('interference_score', 0):.1f} / 100", f"How much {l1_name} patterns affect English (lower = better)"],
	[f"{l1_name} Interference", f"{l1_data.get('l1_interference_score', l1_data.get('bhojpuri_interference_score', 0)):.1f} / 100", f"Specific {l1_name} sound patterns detected"],
	["Prosodic Score", f"{pp.get('prosodic_score', 0):.1f} / 100", "Rhythm, stress, and intonation quality"],
	["Fluency", f"{cs.get('fluency_score', 0):.1f} / 100", "How smoothly words connect together"],
	["Cognitive Load", f"{mb.get('cognitive_load', {}).get('score', 0):.1f} / 100", "Mental effort during speech (lower = easier)"],
	["Voice Quality", f"{vq.get('overall_quality_score', 0):.1f} / 100", "Overall voice health and clarity"],
	]
	score_table = Table(score_data, colWidths=[120, 80, 260])
	score_table.setStyle(TableStyle([
	("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#0891b2")),
	("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
	("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
	("FONTSIZE", (0, 0), (-1, -1), 9),
	("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e2e8f0")),
	("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8fafc")]),
	("BOTTOMPADDING", (0, 0), (-1, -1), 6),
	("TOPPADDING", (0, 0), (-1, -1), 6),
	]))
	elements.append(score_table)
	elements.append(Spacer(1, 8 * mm))

	# Spectrogram images
	if audio_path:
	spec_img = _generate_spectrogram_image(audio_path)
	if spec_img:
	elements.append(PageBreak())
	elements.append(Paragraph("Voice Visualization", heading_style))
	elements.append(Paragraph(
	"These images show your child's voice patterns. The colors represent energy at different frequencies.",
	small_style,
	))
	elements.append(Spacer(1, 3 * mm))
	elements.append(Image(io.BytesIO(spec_img), width=16 * cm, height=12 * cm))
	elements.append(Spacer(1, 5 * mm))

	formant_img = _generate_formant_plot(profile.get("feature_extraction", {}).get("parselmouth", {}).get("formants", {}))
	if formant_img:
	elements.append(Paragraph("Vowel Space", heading_style))
	elements.append(Paragraph(
	"Blue dots show where your child's vowels land. Red labels show where English vowels should be. "
	"The gap between them shows which vowels need practice.",
	small_style,
	))
	elements.append(Spacer(1, 3 * mm))
	elements.append(Image(io.BytesIO(formant_img), width=12 * cm, height=10 * cm))

	# L1 interference details
	if l1_data.get("detected_patterns"):
	elements.append(PageBreak())
	elements.append(Paragraph(f"{l1_name} L1 Interference Patterns Detected", heading_style))
	elements.append(Paragraph(
	f"These are specific patterns where your child's {l1_name} sounds are transferring into their English.",
	body_style,
	))
	elements.append(Spacer(1, 3 * mm))

	for pat in l1_data["detected_patterns"]:
	pat_data = [
	["Pattern:", pat.get("pattern", "").replace("_", " ").title()],
	["Evidence:", pat.get("evidence", "")],
	["Severity:", pat.get("severity", "")],
	["What to Practice:", pat.get("remediation", "")],
	]
	pat_table = Table(pat_data, colWidths=[100, 360])
	pat_table.setStyle(TableStyle([
	("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
	("FONTSIZE", (0, 0), (-1, -1), 9),
	("BOTTOMPADDING", (0, 0), (-1, -1), 3),
	("TOPPADDING", (0, 0), (-1, -1), 3),
	("LINEBELOW", (0, -1), (-1, -1), 0.5, colors.HexColor("#e2e8f0")),
	]))
	elements.append(pat_table)
	elements.append(Spacer(1, 4 * mm))

	# Recommendations
	elements.append(Paragraph("Recommendations for Parents", heading_style))
	recs = [
	"Practice the specific sounds listed above for 10-15 minutes daily.",
	"Focus on one sound pattern per week.",
	"Read English storybooks aloud together — this builds natural rhythm.",
	"Record your child reading and play it back — self-monitoring helps.",
	"Praise effort, not perfection — confidence is key to speaking improvement.",
	]
	if l1_data.get("detected_patterns"):
	for pat in l1_data["detected_patterns"]:
	if pat.get("remediation"):
	recs.append(pat["remediation"])

	for i, rec in enumerate(recs, 1):
	elements.append(Paragraph(f"{i}. {rec}", body_style))
	elements.append(Spacer(1, 2 * mm))

	elements.append(Spacer(1, 10 * mm))
	elements.append(HRFlowable(width="100%", color=colors.grey, thickness=0.5))
	elements.append(Paragraph(
	f"Generated by Contrastive Acoustic Voice Profiling System \| {datetime.now().strftime('%Y-%m-%d %H:%M')}",
	small_style,
	))

	doc.build(elements)
	buf.seek(0)
	return buf.read()


	def _generate_simple_pdf(
	profile: dict[str, Any],
	student_name: str,
	student_id: str,
	audio_path: str \| Path \| None,
	) -> bytes:
	"""Fallback PDF generation without reportlab (plain text)."""
	import json

	lines = [
	"CONTRASTIVE ACOUSTIC VOICE PROFILE - DIAGNOSTIC REPORT",
	"=" * 55,
	f"Student: {student_name}",
	f"ID: {student_id}",
	f"Date: {datetime.now().strftime('%B %d, %Y')}",
	"",
	"SCORES:",
	f" Phoneme Accuracy: {profile.get('phoneme_analysis', {}).get('overall_accuracy', 0) * 100:.1f}",
	f" L1 Interference: {profile.get('phoneme_analysis', {}).get('interference_score', 0):.1f}",
	f" Prosodic Score: {profile.get('prosodic_profile', {}).get('prosodic_score', 0):.1f}",
	f" Fluency: {profile.get('connected_speech', {}).get('fluency_score', 0):.1f}",
	f" Voice Quality: {profile.get('voice_quality', {}).get('overall_quality_score', 0):.1f}",
	"",
	]

	l1_fb = profile.get("l1_interference", profile.get("bhojpuri_interference", {}))
	l1_fb_name = profile.get("l1_display_name", l1_fb.get("l1_display_name", "L1"))
	lines.append(f"{l1_fb_name.upper()} INTERFERENCE:")
	for pat in l1_fb.get("detected_patterns", []):
	lines.append(f" - {pat.get('pattern', '')}: {pat.get('evidence', '')}")
	lines.append(f" Practice: {pat.get('remediation', '')}")

	return "\n".join(lines).encode("utf-8")