Spaces:
Build error
Build error
| """ELAN Annotation Format (.eaf) Export | |
| Generates standard ELAN XML files from voice profile analysis results. | |
| Produces a multi-tier annotation corpus suitable for: | |
| - PhD linguistic research | |
| - Corpus-based phonological studies | |
| - Cross-speaker contrastive analysis | |
| - Longitudinal acquisition tracking | |
| ELAN spec: https://archive.mpi.nl/tla/elan/documentation | |
| Tiers generated: | |
| 1. Transcription β Word-level transcript with timestamps | |
| 2. Phonemes β Phone-level segmentation (from forced alignment or Wav2Vec) | |
| 3. Prosody β Intonation patterns, stress, rhythm annotations | |
| 4. L1_Interference β Detected L1 transfer patterns with severity | |
| 5. Voice_Quality β Phonation type, breathiness, creak annotations | |
| 6. Cognitive_Load β Filled pauses, hesitations, self-corrections | |
| 7. Connected_Speech β Assimilation, elision, linking events | |
| 8. CIF_Score β Overall Contrastive Interference Index per segment | |
| 9. Emotion β Emotional valence/arousal labels | |
| 10. Speaker_Metadata β Speaker ID, language, session info | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| import time | |
| import uuid | |
| from pathlib import Path | |
| from typing import Any | |
| from xml.etree.ElementTree import Element, SubElement, tostring | |
| from xml.dom import minidom | |
| logger = logging.getLogger(__name__) | |
| # ELAN namespace and schema | |
| ELAN_SCHEMA = "http://www.mpi.nl/tools/elan/EAFv3.0.xsd" | |
| ELAN_FORMAT = "3.0" | |
| def _ts_id(counter: list[int]) -> str: | |
| """Generate a unique time slot ID.""" | |
| counter[0] += 1 | |
| return f"ts{counter[0]}" | |
| def _ann_id(counter: list[int]) -> str: | |
| """Generate a unique annotation ID.""" | |
| counter[0] += 1 | |
| return f"a{counter[0]}" | |
| def _ms_to_elan(ms: float) -> int: | |
| """Convert milliseconds (float) to ELAN time value (int ms).""" | |
| return int(round(ms)) | |
| def generate_eaf( | |
| profile: dict[str, Any], | |
| audio_path: Path | str, | |
| speaker_id: str = "anonymous", | |
| student_name: str = "Student", | |
| language: str = "en", | |
| l1_language: str = "bho", | |
| session_id: str | None = None, | |
| ) -> str: | |
| """Generate a complete ELAN .eaf XML document from analysis results. | |
| Args: | |
| profile: Full pipeline output dict (all 10 layers). | |
| audio_path: Path to the source audio file. | |
| speaker_id: Unique speaker identifier. | |
| student_name: Display name. | |
| language: Target language code. | |
| l1_language: L1 language code. | |
| session_id: Optional session identifier. | |
| Returns: | |
| EAF XML as a formatted string. | |
| """ | |
| audio_path = Path(audio_path) | |
| session_id = session_id or f"session_{int(time.time())}" | |
| ts_counter = [0] | |
| ann_counter = [0] | |
| # ββ Root element βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| root = Element("ANNOTATION_DOCUMENT") | |
| root.set("AUTHOR", "Contrastive Voice Profiling Engine") | |
| root.set("DATE", time.strftime("%Y-%m-%dT%H:%M:%S+00:00")) | |
| root.set("FORMAT", ELAN_FORMAT) | |
| root.set("VERSION", ELAN_FORMAT) | |
| root.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") | |
| root.set("xsi:noNamespaceSchemaLocation", ELAN_SCHEMA) | |
| # ββ Header βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| header = SubElement(root, "HEADER") | |
| header.set("MEDIA_FILE", "") | |
| header.set("TIME_UNITS", "milliseconds") | |
| media = SubElement(header, "MEDIA_DESCRIPTOR") | |
| media.set("MEDIA_URL", f"file:///{audio_path.resolve()}") | |
| media.set("MIME_TYPE", "audio/x-wav") | |
| media.set("RELATIVE_MEDIA_URL", f"./{audio_path.name}") | |
| # Properties | |
| for key, val in [ | |
| ("speaker_id", speaker_id), | |
| ("student_name", student_name), | |
| ("l1_language", l1_language), | |
| ("target_language", language), | |
| ("session_id", session_id), | |
| ("generator", "contrastive-voice-profiling-engine"), | |
| ]: | |
| prop = SubElement(header, "PROPERTY") | |
| prop.set("NAME", key) | |
| prop.text = str(val) | |
| # ββ Collect all time points ββββββββββββββββββββββββββββββββββββββ | |
| time_slots: list[tuple[str, int]] = [] | |
| def _add_ts(ms: float) -> str: | |
| ts_id = _ts_id(ts_counter) | |
| time_slots.append((ts_id, _ms_to_elan(ms))) | |
| return ts_id | |
| # ββ Build tiers ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| tiers_data: list[dict] = [] | |
| # Tier 1: Transcription (word-level) | |
| _build_transcription_tier(profile, tiers_data, _add_ts, ann_counter) | |
| # Tier 2: Phonemes | |
| _build_phoneme_tier(profile, tiers_data, _add_ts, ann_counter) | |
| # Tier 3: Prosody | |
| _build_prosody_tier(profile, tiers_data, _add_ts, ann_counter) | |
| # Tier 4: L1 Interference | |
| _build_l1_tier(profile, tiers_data, _add_ts, ann_counter, l1_language) | |
| # Tier 5: Voice Quality | |
| _build_voice_quality_tier(profile, tiers_data, _add_ts, ann_counter) | |
| # Tier 6: Cognitive Load | |
| _build_cognitive_load_tier(profile, tiers_data, _add_ts, ann_counter) | |
| # Tier 7: Connected Speech | |
| _build_connected_speech_tier(profile, tiers_data, _add_ts, ann_counter) | |
| # Tier 8: CIF Score | |
| _build_cif_tier(profile, tiers_data, _add_ts, ann_counter) | |
| # Tier 9: Emotion | |
| _build_emotion_tier(profile, tiers_data, _add_ts, ann_counter) | |
| # Tier 10: Metadata (single span) | |
| _build_metadata_tier( | |
| profile, tiers_data, _add_ts, ann_counter, | |
| speaker_id, student_name, language, l1_language, session_id, | |
| ) | |
| # ββ Write TIME_ORDER βββββββββββββββββββββββββββββββββββββββββββββ | |
| time_order = SubElement(root, "TIME_ORDER") | |
| # Sort by time value for valid EAF | |
| time_slots.sort(key=lambda x: x[1]) | |
| for ts_id, ts_val in time_slots: | |
| ts_el = SubElement(time_order, "TIME_SLOT") | |
| ts_el.set("TIME_SLOT_ID", ts_id) | |
| ts_el.set("TIME_VALUE", str(ts_val)) | |
| # ββ Write TIERs ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| for tier_info in tiers_data: | |
| tier_el = SubElement(root, "TIER") | |
| tier_el.set("LINGUISTIC_TYPE_REF", tier_info.get("type_ref", "default-lt")) | |
| tier_el.set("TIER_ID", tier_info["tier_id"]) | |
| if "participant" in tier_info: | |
| tier_el.set("PARTICIPANT", tier_info["participant"]) | |
| if "annotator" in tier_info: | |
| tier_el.set("ANNOTATOR", tier_info["annotator"]) | |
| for ann in tier_info.get("annotations", []): | |
| ann_el = SubElement(tier_el, "ANNOTATION") | |
| align_ann = SubElement(ann_el, "ALIGNABLE_ANNOTATION") | |
| align_ann.set("ANNOTATION_ID", ann["id"]) | |
| align_ann.set("TIME_SLOT_REF1", ann["ts1"]) | |
| align_ann.set("TIME_SLOT_REF2", ann["ts2"]) | |
| value_el = SubElement(align_ann, "ANNOTATION_VALUE") | |
| value_el.text = ann["value"] | |
| # ββ Linguistic Types βββββββββββββββββββββββββββββββββββββββββββββ | |
| ling_types = [ | |
| "default-lt", "phoneme-lt", "prosody-lt", "interference-lt", | |
| "voice-quality-lt", "cognitive-lt", "connected-speech-lt", | |
| "cif-lt", "emotion-lt", "metadata-lt", | |
| ] | |
| for lt in ling_types: | |
| lt_el = SubElement(root, "LINGUISTIC_TYPE") | |
| lt_el.set("GRAPHIC_REFERENCES", "false") | |
| lt_el.set("LINGUISTIC_TYPE_ID", lt) | |
| lt_el.set("TIME_ALIGNABLE", "true") | |
| # ββ Constraints ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| for stereo, desc in [ | |
| ("Time_Subdivision", "Time subdivision of parent annotation's time interval"), | |
| ("Symbolic_Subdivision", "Symbolic subdivision of parent annotation's time interval"), | |
| ("Symbolic_Association", "1-1 association with a parent annotation"), | |
| ("Included_In", "Time included in parent annotation's time interval"), | |
| ]: | |
| con = SubElement(root, "CONSTRAINT") | |
| con.set("DESCRIPTION", desc) | |
| con.set("STEREOTYPE", stereo) | |
| # ββ Format and return ββββββββββββββββββββββββββββββββββββββββββββ | |
| raw_xml = tostring(root, encoding="unicode") | |
| parsed = minidom.parseString(raw_xml) | |
| return parsed.toprettyxml(indent=" ", encoding=None) | |
| # ββ Tier Builders ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _build_transcription_tier(profile, tiers, add_ts, ann_ctr): | |
| """Tier 1: Word-level transcription with timestamps.""" | |
| trans = profile.get("transcription", {}) | |
| word_ts = trans.get("word_timestamps", []) | |
| annotations = [] | |
| for w in word_ts: | |
| if not w.get("word"): | |
| continue | |
| start = w.get("start", 0) * 1000 if w.get("start", 0) < 100 else w.get("start", 0) | |
| end = w.get("end", 0) * 1000 if w.get("end", 0) < 100 else w.get("end", 0) | |
| ts1 = add_ts(start) | |
| ts2 = add_ts(end) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": w["word"], | |
| }) | |
| # If no word timestamps, use segment-level | |
| if not annotations: | |
| for seg in trans.get("segments", []): | |
| ts1 = add_ts(seg["start"] * 1000) | |
| ts2 = add_ts(seg["end"] * 1000) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": seg.get("text", "").strip(), | |
| }) | |
| tiers.append({ | |
| "tier_id": "Transcription", | |
| "type_ref": "default-lt", | |
| "participant": "Speaker", | |
| "annotator": "whisper", | |
| "annotations": annotations, | |
| }) | |
| def _build_phoneme_tier(profile, tiers, add_ts, ann_ctr): | |
| """Tier 2: Phone-level segmentation.""" | |
| # Try forced alignment first, fall back to phoneme_analysis | |
| fa = profile.get("forced_alignment", {}) | |
| phones = fa.get("phones", []) | |
| # Fall back to Wav2Vec phoneme spans | |
| if not phones: | |
| pa = profile.get("phoneme_analysis", {}) | |
| phones = pa.get("phoneme_details", []) | |
| annotations = [] | |
| for p in phones: | |
| phone = p.get("phone") or p.get("phoneme", "") | |
| start = p.get("start_ms", 0) | |
| end = p.get("end_ms", start + p.get("duration_ms", 50)) | |
| if not phone: | |
| continue | |
| ts1 = add_ts(start) | |
| ts2 = add_ts(end) | |
| conf = p.get("confidence", 0) | |
| source = p.get("source", "wav2vec") | |
| label = f"{phone} [{source}:{conf:.2f}]" if conf else phone | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": label, | |
| }) | |
| tiers.append({ | |
| "tier_id": "Phonemes", | |
| "type_ref": "phoneme-lt", | |
| "annotator": "forced_alignment", | |
| "annotations": annotations, | |
| }) | |
| def _build_prosody_tier(profile, tiers, add_ts, ann_ctr): | |
| """Tier 3: Prosodic annotations (intonation, stress, rhythm).""" | |
| prosody = profile.get("prosodic_profile", {}) | |
| annotations = [] | |
| # Intonation pattern annotations | |
| intonation = prosody.get("intonation", {}) | |
| if intonation: | |
| pattern = intonation.get("pattern", "unknown") | |
| boundary_tones = intonation.get("boundary_tones", []) | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"Intonation: {pattern} | Tones: {', '.join(boundary_tones) if boundary_tones else 'N/A'}", | |
| }) | |
| # Rhythm classification | |
| rhythm = prosody.get("rhythm", {}) | |
| if rhythm: | |
| rhythm_class = rhythm.get("rhythm_class", "unknown") | |
| npvi = rhythm.get("nPVI_V", 0) | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"Rhythm: {rhythm_class} (nPVI={npvi:.1f})", | |
| }) | |
| # Stressed words | |
| stress = prosody.get("stress_patterns", []) | |
| for s in stress: | |
| if s.get("stressed"): | |
| start = s.get("start", 0) | |
| end = s.get("end", start + 200) | |
| # Convert seconds to ms if needed | |
| if start < 100: | |
| start *= 1000 | |
| end *= 1000 | |
| ts1 = add_ts(start) | |
| ts2 = add_ts(end) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"STRESS: {s.get('word', '')}", | |
| }) | |
| tiers.append({ | |
| "tier_id": "Prosody", | |
| "type_ref": "prosody-lt", | |
| "annotator": "prosodic_profiling", | |
| "annotations": annotations, | |
| }) | |
| def _build_l1_tier(profile, tiers, add_ts, ann_ctr, l1_language): | |
| """Tier 4: L1 interference patterns.""" | |
| l1 = profile.get("l1_interference", {}) | |
| annotations = [] | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| # Overall interference score | |
| interference_score = l1.get("interference_score", 0) | |
| display_name = l1.get("l1_display_name", l1_language) | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"L1={display_name} | Interference={interference_score}/100", | |
| }) | |
| # Individual detected patterns | |
| patterns = l1.get("detected_patterns", []) | |
| for pat in patterns: | |
| name = pat.get("pattern") or pat.get("name", "unknown") | |
| severity = pat.get("severity", "low") | |
| evidence = pat.get("evidence", "") | |
| remediation = pat.get("remediation", "") | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| value = f"[{severity.upper()}] {name}" | |
| if evidence: | |
| value += f" | {evidence}" | |
| if remediation: | |
| value += f" | FIX: {remediation}" | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": value, | |
| }) | |
| tiers.append({ | |
| "tier_id": "L1_Interference", | |
| "type_ref": "interference-lt", | |
| "annotator": "l1_targets", | |
| "annotations": annotations, | |
| }) | |
| def _build_voice_quality_tier(profile, tiers, add_ts, ann_ctr): | |
| """Tier 5: Voice quality annotations.""" | |
| vq = profile.get("voice_quality", {}) | |
| vs = profile.get("voicesauce", {}) | |
| annotations = [] | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| # Phonation type | |
| phonation = vs.get("phonation_type") or vq.get("breathiness", {}).get("classification", "modal") | |
| register = vq.get("register", {}).get("type", "unknown") | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"Phonation: {phonation} | Register: {register}", | |
| }) | |
| # VoiceSauce measures | |
| if vs: | |
| h1h2 = vs.get("H1_H2", {}) | |
| cpp = vs.get("CPP", {}) | |
| shr = vs.get("SHR", {}) | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": ( | |
| f"H1-H2={h1h2.get('mean', 0):.1f}dB | " | |
| f"CPP={cpp.get('mean', 0):.1f}dB | " | |
| f"SHR={shr.get('mean', 0):.3f} | " | |
| f"Breathiness={vs.get('breathiness_index', 0):.2f} | " | |
| f"Creak={vs.get('creak_index', 0):.2f}" | |
| ), | |
| }) | |
| # Nasality | |
| nas = vq.get("nasality", {}) | |
| if nas: | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"Nasality index: {nas.get('nasality_index', 0):.2f}", | |
| }) | |
| tiers.append({ | |
| "tier_id": "Voice_Quality", | |
| "type_ref": "voice-quality-lt", | |
| "annotator": "voice_quality+voicesauce", | |
| "annotations": annotations, | |
| }) | |
| def _build_cognitive_load_tier(profile, tiers, add_ts, ann_ctr): | |
| """Tier 6: Cognitive load markers (filled pauses, hesitations).""" | |
| mb = profile.get("morpheme_boundary", {}) | |
| cog = mb.get("cognitive_load", {}) | |
| annotations = [] | |
| # Filled pauses | |
| indicators = cog.get("indicators", []) | |
| for ind in indicators: | |
| if isinstance(ind, str): | |
| # Simple string indicator β create utterance-level annotation | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"COGNITIVE: {ind}", | |
| }) | |
| elif isinstance(ind, dict): | |
| start = ind.get("start_ms", 0) | |
| end = ind.get("end_ms", start + 200) | |
| ts1 = add_ts(start) | |
| ts2 = add_ts(end) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"COGNITIVE: {ind.get('type', 'marker')} β {ind.get('description', '')}", | |
| }) | |
| # Overall score | |
| score = cog.get("score", 0) | |
| if score > 0: | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"Cognitive Load Score: {score}/100", | |
| }) | |
| tiers.append({ | |
| "tier_id": "Cognitive_Load", | |
| "type_ref": "cognitive-lt", | |
| "annotator": "morpheme_boundary", | |
| "annotations": annotations, | |
| }) | |
| def _build_connected_speech_tier(profile, tiers, add_ts, ann_ctr): | |
| """Tier 7: Connected speech processes (assimilation, elision, linking).""" | |
| cs = profile.get("connected_speech", {}) | |
| annotations = [] | |
| for process_type in ["assimilations", "elisions", "linkings", "reductions"]: | |
| events = cs.get(process_type, []) | |
| for ev in events: | |
| if isinstance(ev, dict): | |
| start = ev.get("start_ms", ev.get("position_ms", 0)) | |
| end = ev.get("end_ms", start + 150) | |
| label = ev.get("label") or ev.get("type", process_type.rstrip("s")) | |
| context = ev.get("context", "") | |
| ts1 = add_ts(start) | |
| ts2 = add_ts(end) | |
| value = f"[{process_type.upper().rstrip('S')}] {label}" | |
| if context: | |
| value += f" | {context}" | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": value, | |
| }) | |
| # Fluency score | |
| fluency = cs.get("fluency_score", 0) | |
| if fluency > 0: | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"Fluency Score: {fluency}/100", | |
| }) | |
| tiers.append({ | |
| "tier_id": "Connected_Speech", | |
| "type_ref": "connected-speech-lt", | |
| "annotator": "connected_speech", | |
| "annotations": annotations, | |
| }) | |
| def _build_cif_tier(profile, tiers, add_ts, ann_ctr): | |
| """Tier 8: CIF (Contrastive Interference Field) scores.""" | |
| cif = profile.get("cif_analysis", {}) | |
| annotations = [] | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| overall_cii = cif.get("overall_cii", 0) | |
| severity = cif.get("overall_severity", "unknown") | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"CII={overall_cii:.3f} [{severity}]", | |
| }) | |
| # Per-dimension CII | |
| dimensions = cif.get("dimensions", {}) | |
| for dim_name, dim_data in dimensions.items(): | |
| if isinstance(dim_data, dict): | |
| dim_cii = dim_data.get("cii", 0) | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"CIF-{dim_name}: {dim_cii:.3f}", | |
| }) | |
| tiers.append({ | |
| "tier_id": "CIF_Score", | |
| "type_ref": "cif-lt", | |
| "annotator": "cif_model", | |
| "annotations": annotations, | |
| }) | |
| def _build_emotion_tier(profile, tiers, add_ts, ann_ctr): | |
| """Tier 9: Emotional analysis annotations.""" | |
| annotations = [] | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| # SpeechBrain emotion | |
| ai = profile.get("ai_classification", {}) | |
| sb = ai.get("speechbrain", {}) | |
| if sb: | |
| emotion = sb.get("emotion", {}) | |
| if emotion: | |
| label = emotion.get("label", "unknown") | |
| conf = emotion.get("confidence", 0) | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"SpeechBrain: {label} ({conf:.2f})", | |
| }) | |
| # auDeep emotion | |
| audeep = profile.get("audeep", {}) | |
| if audeep: | |
| primary = audeep.get("primary_emotion", "neutral") | |
| conf = audeep.get("emotion_confidence", 0) | |
| v = audeep.get("valence", 0) | |
| a = audeep.get("arousal", 0) | |
| d = audeep.get("dominance", 0) | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| annotations.append({ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": f"auDeep: {primary} ({conf:.2f}) | V={v:.2f} A={a:.2f} D={d:.2f}", | |
| }) | |
| tiers.append({ | |
| "tier_id": "Emotion", | |
| "type_ref": "emotion-lt", | |
| "annotator": "speechbrain+audeep", | |
| "annotations": annotations, | |
| }) | |
| def _build_metadata_tier(profile, tiers, add_ts, ann_ctr, | |
| speaker_id, student_name, language, l1_language, session_id): | |
| """Tier 10: Session metadata.""" | |
| duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000 | |
| processing_ms = profile.get("processing_time_ms", 0) | |
| ts1 = add_ts(0) | |
| ts2 = add_ts(duration_ms) | |
| meta_str = ( | |
| f"Speaker: {speaker_id} ({student_name}) | " | |
| f"L1: {l1_language} β L2: {language} | " | |
| f"Session: {session_id} | " | |
| f"Processing: {processing_ms:.0f}ms" | |
| ) | |
| tiers.append({ | |
| "tier_id": "Speaker_Metadata", | |
| "type_ref": "metadata-lt", | |
| "participant": speaker_id, | |
| "annotations": [{ | |
| "id": _ann_id(ann_ctr), | |
| "ts1": ts1, "ts2": ts2, | |
| "value": meta_str, | |
| }], | |
| }) | |
| # ββ Public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def export_eaf( | |
| profile: dict[str, Any], | |
| audio_path: Path | str, | |
| output_path: Path | str | None = None, | |
| speaker_id: str = "anonymous", | |
| student_name: str = "Student", | |
| language: str = "en", | |
| l1_language: str = "bho", | |
| session_id: str | None = None, | |
| ) -> tuple[str, Path]: | |
| """Export analysis results to ELAN .eaf format. | |
| Args: | |
| profile: Full pipeline output dict. | |
| audio_path: Path to source audio. | |
| output_path: Where to save the .eaf file. If None, saves next to audio. | |
| speaker_id: Speaker identifier. | |
| student_name: Display name. | |
| language: Target language. | |
| l1_language: L1 language code. | |
| session_id: Session identifier. | |
| Returns: | |
| Tuple of (eaf_xml_string, output_path). | |
| """ | |
| audio_path = Path(audio_path) | |
| if output_path is None: | |
| output_path = audio_path.with_suffix(".eaf") | |
| else: | |
| output_path = Path(output_path) | |
| eaf_xml = generate_eaf( | |
| profile=profile, | |
| audio_path=audio_path, | |
| speaker_id=speaker_id, | |
| student_name=student_name, | |
| language=language, | |
| l1_language=l1_language, | |
| session_id=session_id, | |
| ) | |
| output_path.write_text(eaf_xml, encoding="utf-8") | |
| logger.info("ELAN export saved to %s", output_path) | |
| return eaf_xml, output_path | |