"""CONTRASTIVE COMPARISON ENGINE Compares two voice profiles (L1 vs L2, pre vs post, speaker A vs B) and produces an interference/difference report. """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any import numpy as np @dataclass class DimensionDiff: dimension: str sample_a_value: float sample_b_value: float absolute_diff: float percent_diff: float significance: str # "low", "moderate", "high", "critical" @dataclass class ContrastiveReport: sample_a_id: str sample_b_id: str phoneme_interference_score: float # 0-100 prosodic_divergence_score: float # 0-100 voice_quality_similarity: float # 0-1 fluency_gap: float # 0-100 rhythm_class_match: bool formant_differences: list[DimensionDiff] pitch_differences: list[DimensionDiff] quality_differences: list[DimensionDiff] overall_contrastive_score: float # 0-100, higher = more different key_interference_patterns: list[str] recommendations: list[str] def _diff(name: str, a: float, b: float) -> DimensionDiff: abs_d = abs(a - b) pct = (abs_d / abs(a) * 100) if a != 0 else 0 if pct > 50: sig = "critical" elif pct > 25: sig = "high" elif pct > 10: sig = "moderate" else: sig = "low" return DimensionDiff( dimension=name, sample_a_value=round(a, 2), sample_b_value=round(b, 2), absolute_diff=round(abs_d, 2), percent_diff=round(pct, 2), significance=sig, ) def compare_profiles( profile_a: dict[str, Any], profile_b: dict[str, Any], sample_a_id: str = "sample_a", sample_b_id: str = "sample_b", ) -> ContrastiveReport: """Compare two full voice profiles contrastively.""" # Extract sub-profiles pa_formants = profile_a.get("phoneme_analysis", {}).get("formant_means", {}) pb_formants = profile_b.get("phoneme_analysis", {}).get("formant_means", {}) pa_pitch = profile_a.get("prosodic_profile", {}).get("speech_rate_syl_per_sec", 0) pb_pitch = profile_b.get("prosodic_profile", {}).get("speech_rate_syl_per_sec", 0) # Formant differences formant_diffs = [ _diff(f"F{i}", pa_formants.get(f"f{i}", 0), pb_formants.get(f"f{i}", 0)) for i in range(1, 5) ] # Pitch differences pa_pros = profile_a.get("prosodic_profile", {}) pb_pros = profile_b.get("prosodic_profile", {}) pa_rhythm = pa_pros.get("rhythm", {}) pb_rhythm = pb_pros.get("rhythm", {}) pa_into = pa_pros.get("intonation", {}) pb_into = pb_pros.get("intonation", {}) pitch_diffs = [ _diff("speech_rate", pa_pitch, pb_pitch), _diff("nPVI_vocalic", pa_rhythm.get("npvi_v", 0), pb_rhythm.get("npvi_v", 0)), _diff("percent_V", pa_rhythm.get("percent_v", 0), pb_rhythm.get("percent_v", 0)), _diff("prosodic_score", pa_pros.get("prosodic_score", 0), pb_pros.get("prosodic_score", 0)), ] # Voice quality differences pa_vq = profile_a.get("voice_quality", {}) pb_vq = profile_b.get("voice_quality", {}) pa_breath = pa_vq.get("breathiness", {}) pb_breath = pb_vq.get("breathiness", {}) pa_creak = pa_vq.get("creakiness", {}) pb_creak = pb_vq.get("creakiness", {}) quality_diffs = [ _diff("HNR", pa_breath.get("hnr", 0), pb_breath.get("hnr", 0)), _diff("CPP", pa_breath.get("cpp", 0), pb_breath.get("cpp", 0)), _diff("breathiness_index", pa_breath.get("breathiness_index", 0), pb_breath.get("breathiness_index", 0)), _diff("creak_index", pa_creak.get("creak_index", 0), pb_creak.get("creak_index", 0)), _diff("jitter", pa_creak.get("jitter_local", 0), pb_creak.get("jitter_local", 0)), _diff("shimmer", pa_creak.get("shimmer_local", 0), pb_creak.get("shimmer_local", 0)), ] # Composite scores phoneme_interference = abs( profile_a.get("phoneme_analysis", {}).get("interference_score", 50) - profile_b.get("phoneme_analysis", {}).get("interference_score", 50) ) prosodic_divergence = np.mean([d.percent_diff for d in pitch_diffs]) if pitch_diffs else 0 vq_diffs_vals = [d.percent_diff for d in quality_diffs] vq_similarity = max(0, 1.0 - np.mean(vq_diffs_vals) / 100) if vq_diffs_vals else 0.5 fluency_a = profile_a.get("connected_speech", {}).get("fluency_score", 50) fluency_b = profile_b.get("connected_speech", {}).get("fluency_score", 50) fluency_gap = abs(fluency_a - fluency_b) rhythm_match = pa_rhythm.get("rhythm_class", "") == pb_rhythm.get("rhythm_class", "") # Overall contrastive score all_pcts = [d.percent_diff for d in formant_diffs + pitch_diffs + quality_diffs] overall = min(100.0, float(np.mean(all_pcts))) if all_pcts else 50.0 # Key patterns patterns: list[str] = [] critical = [d for d in formant_diffs + pitch_diffs + quality_diffs if d.significance in ("high", "critical")] for d in critical: patterns.append(f"{d.dimension}: {d.significance} difference ({d.percent_diff:.1f}%)") if not rhythm_match: patterns.append(f"Rhythm class mismatch: {pa_rhythm.get('rhythm_class', '?')} vs {pb_rhythm.get('rhythm_class', '?')}") # Recommendations recs: list[str] = [] formant_critical = [d for d in formant_diffs if d.significance in ("high", "critical")] if formant_critical: recs.append("Focus on vowel production — significant formant deviations detected") if not rhythm_match: recs.append("Work on rhythm patterns — L1 rhythm type is transferring to L2") if fluency_gap > 30: recs.append("Connected speech practice needed — large fluency gap between samples") if any(d.dimension == "jitter" and d.significance in ("high", "critical") for d in quality_diffs): recs.append("Monitor voice quality — elevated perturbation measures") return ContrastiveReport( sample_a_id=sample_a_id, sample_b_id=sample_b_id, phoneme_interference_score=round(phoneme_interference, 2), prosodic_divergence_score=round(float(prosodic_divergence), 2), voice_quality_similarity=round(float(vq_similarity), 4), fluency_gap=round(fluency_gap, 2), rhythm_class_match=rhythm_match, formant_differences=formant_diffs, pitch_differences=pitch_diffs, quality_differences=quality_diffs, overall_contrastive_score=round(overall, 2), key_interference_patterns=patterns, recommendations=recs, )