Spaces:

daasime
/

sop-audio-analyzer

Sleeping

App Files Files Community

sop-audio-analyzer / src /reporting.py

daasime

Fix PDF download: convert bytearray to bytes for Streamlit

4d50a3e 14 days ago

raw

history blame contribute delete

6.2 kB

	"""
	PDF Report Generator for Audio Analysis results.
	"""
	from fpdf import FPDF
	from datetime import datetime


	def generate_pdf_report(result) -> bytes:
	"""Generate a PDF report from AnalysisResult."""
	pdf = FPDF()
	pdf.set_auto_page_break(auto=True, margin=15)
	pdf.add_page()

	# Header
	pdf.set_font("Helvetica", "B", 18)
	pdf.cell(0, 12, "Audio Analysis Report", new_x="LMARGIN", new_y="NEXT", align="C")
	pdf.set_font("Helvetica", "", 10)
	pdf.cell(0, 6, f"Test ID: {result.test_id}", new_x="LMARGIN", new_y="NEXT", align="C")
	pdf.cell(0, 6, f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}", new_x="LMARGIN", new_y="NEXT", align="C")
	pdf.ln(8)

	# Risk Score
	score = result.risk_score
	label = result.risk_label
	r, g, b = _risk_rgb(score)
	pdf.set_fill_color(r, g, b)
	pdf.set_text_color(255, 255, 255)
	pdf.set_font("Helvetica", "B", 28)
	pdf.cell(40, 18, str(score), fill=True, align="C")
	pdf.set_font("Helvetica", "B", 14)
	pdf.cell(50, 18, f" {label} RISK", new_x="LMARGIN", new_y="NEXT")
	pdf.set_text_color(0, 0, 0)
	pdf.ln(6)

	# Audio Summary
	_section(pdf, "Audio Summary")
	pdf.set_font("Helvetica", "", 10)
	pdf.cell(0, 6, f"File: {result.filename}", new_x="LMARGIN", new_y="NEXT")
	pdf.cell(0, 6, f"Duration: {result.duration_seconds:.1f}s", new_x="LMARGIN", new_y="NEXT")
	pdf.cell(0, 6, f"Analyzed: {result.analyzed_at}", new_x="LMARGIN", new_y="NEXT")
	pdf.ln(4)

	# Speaker Summary
	_section(pdf, "Speaker Summary")
	if result.main_speaker:
	m = result.main_speaker
	pdf.set_font("Helvetica", "", 10)
	pdf.cell(0, 6,
	f"Main Speaker: {m.voiceprint_id} \| Quality: {m.quality} \| "
	f"Synthetic: {m.synthetic_score:.0%} \| Seen: {m.times_seen}x",
	new_x="LMARGIN", new_y="NEXT")
	if result.additional_speakers:
	for i, s in enumerate(result.additional_speakers):
	pdf.cell(0, 6,
	f"Speaker {chr(66+i)}: {s.voiceprint_id} \| "
	f"{s.total_seconds:.1f}s \| Synthetic: {s.synthetic_score:.0%}",
	new_x="LMARGIN", new_y="NEXT")
	else:
	pdf.cell(0, 6, "No additional speakers detected.", new_x="LMARGIN", new_y="NEXT")
	pdf.ln(4)

	# Detection Flags Table
	_section(pdf, "Detection Flags")
	_row(pdf, ["Flag", "Detected", "Score / Detail"], bold=True)
	synth = result.main_speaker.is_synthetic if result.main_speaker else False
	synth_s = f"{result.main_speaker.synthetic_score:.0%}" if result.main_speaker else "N/A"
	_row(pdf, ["Synthetic Voice", "Yes" if synth else "No", synth_s])
	_row(pdf, ["Playback", "Yes" if result.playback_detected else "No", f"{result.playback_score:.0%}"])
	_row(pdf, ["Reading Pattern", "Yes" if result.reading_pattern_detected else "No", f"{result.reading_confidence:.0%}"])
	_row(pdf, ["Whispers", "Yes" if result.whisper_detected else "No", f"{len(result.whisper_instances or [])} instances"])
	_row(pdf, ["Suspicious Pauses", "Yes" if result.suspicious_pauses_detected else "No",
	f"{len(result.suspicious_pauses or [])} (max {result.longest_pause:.1f}s)"])
	_row(pdf, ["Wake Words", str(len(result.wake_words or [])),
	", ".join(w.get('word', '') for w in (result.wake_words or [])) or "None"])
	pdf.ln(4)

	# Alert Details
	has_alerts = (result.wake_words or
	(result.whisper_instances and len(result.whisper_instances) > 0) or
	(result.suspicious_pauses and len(result.suspicious_pauses) > 0))
	if has_alerts:
	_section(pdf, "Alert Details")
	pdf.set_font("Helvetica", "", 9)
	for ww in (result.wake_words or []):
	pdf.cell(0, 5,
	f" Wake Word: \"{ww.get('word', '')}\" at {ww.get('time', 0):.1f}s "
	f"(confidence: {ww.get('confidence', 0):.0%})",
	new_x="LMARGIN", new_y="NEXT")
	for w in (result.whisper_instances or []):
	pdf.cell(0, 5,
	f" Whisper: {w.get('start', 0):.1f}s - {w.get('end', 0):.1f}s "
	f"(confidence: {w.get('confidence', 0):.0%})",
	new_x="LMARGIN", new_y="NEXT")
	for p in (result.suspicious_pauses or []):
	pdf.cell(0, 5,
	f" Pause: {p.get('start', 0):.1f}s - {p.get('end', 0):.1f}s "
	f"({p.get('duration', 0):.1f}s)",
	new_x="LMARGIN", new_y="NEXT")
	pdf.ln(4)

	# Risk Score Breakdown
	_section(pdf, "Risk Score Breakdown")
	pdf.set_font("Helvetica", "", 10)
	if result.main_speaker:
	pdf.cell(0, 6, f"Synthetic voice: {result.main_speaker.synthetic_score*25:.0f} / 25", new_x="LMARGIN", new_y="NEXT")
	pdf.cell(0, 6, f"Playback: {result.playback_score*15:.0f} / 15", new_x="LMARGIN", new_y="NEXT")
	pdf.cell(0, 6, f"Reading pattern: {result.reading_confidence*20:.0f} / 20", new_x="LMARGIN", new_y="NEXT")
	wc = len(result.whisper_instances or [])
	pdf.cell(0, 6, f"Whispers: {min(wc,3)/3*15:.0f} / 15", new_x="LMARGIN", new_y="NEXT")
	pc = len(result.suspicious_pauses or [])
	pdf.cell(0, 6, f"Suspicious pauses: {min(pc,3)/3*10:.0f} / 10", new_x="LMARGIN", new_y="NEXT")
	wkc = len(result.wake_words or [])
	pdf.cell(0, 6, f"Wake words: {min(wkc,2)/2*10:.0f} / 10", new_x="LMARGIN", new_y="NEXT")
	pdf.ln(6)

	# Footer
	pdf.set_font("Helvetica", "I", 8)
	pdf.cell(0, 5, "Generated by Audio Analyzer PoC", align="C")

	return bytes(pdf.output())


	def _section(pdf, title):
	pdf.set_font("Helvetica", "B", 12)
	pdf.set_fill_color(240, 240, 240)
	pdf.cell(0, 8, f" {title}", fill=True, new_x="LMARGIN", new_y="NEXT")
	pdf.ln(2)


	def _row(pdf, cols, bold=False):
	pdf.set_font("Helvetica", "B" if bold else "", 9)
	widths = [50, 25, 115]
	for i, col in enumerate(cols):
	pdf.cell(widths[i], 6, str(col), border=1)
	pdf.ln()


	def _risk_rgb(score):
	if score <= 30:
	return (34, 197, 94)
	elif score <= 60:
	return (234, 179, 8)
	else:
	return (239, 68, 68)