Spaces:

Shaankar39
/

vaani-cavp-engine

Build error

File size: 25,379 Bytes

7d5f092

"""ELAN Annotation Format (.eaf) Export

Generates standard ELAN XML files from voice profile analysis results.
Produces a multi-tier annotation corpus suitable for:
  - PhD linguistic research
  - Corpus-based phonological studies
  - Cross-speaker contrastive analysis
  - Longitudinal acquisition tracking

ELAN spec: https://archive.mpi.nl/tla/elan/documentation

Tiers generated:
  1. Transcription       — Word-level transcript with timestamps
  2. Phonemes            — Phone-level segmentation (from forced alignment or Wav2Vec)
  3. Prosody             — Intonation patterns, stress, rhythm annotations
  4. L1_Interference     — Detected L1 transfer patterns with severity
  5. Voice_Quality       — Phonation type, breathiness, creak annotations
  6. Cognitive_Load      — Filled pauses, hesitations, self-corrections
  7. Connected_Speech    — Assimilation, elision, linking events
  8. CIF_Score           — Overall Contrastive Interference Index per segment
  9. Emotion             — Emotional valence/arousal labels
  10. Speaker_Metadata   — Speaker ID, language, session info
"""

from __future__ import annotations

import logging
import time
import uuid
from pathlib import Path
from typing import Any
from xml.etree.ElementTree import Element, SubElement, tostring
from xml.dom import minidom

logger = logging.getLogger(__name__)

# ELAN namespace and schema
ELAN_SCHEMA = "http://www.mpi.nl/tools/elan/EAFv3.0.xsd"
ELAN_FORMAT = "3.0"


def _ts_id(counter: list[int]) -> str:
    """Generate a unique time slot ID."""
    counter[0] += 1
    return f"ts{counter[0]}"


def _ann_id(counter: list[int]) -> str:
    """Generate a unique annotation ID."""
    counter[0] += 1
    return f"a{counter[0]}"


def _ms_to_elan(ms: float) -> int:
    """Convert milliseconds (float) to ELAN time value (int ms)."""
    return int(round(ms))


def generate_eaf(
    profile: dict[str, Any],
    audio_path: Path | str,
    speaker_id: str = "anonymous",
    student_name: str = "Student",
    language: str = "en",
    l1_language: str = "bho",
    session_id: str | None = None,
) -> str:
    """Generate a complete ELAN .eaf XML document from analysis results.

    Args:
        profile: Full pipeline output dict (all 10 layers).
        audio_path: Path to the source audio file.
        speaker_id: Unique speaker identifier.
        student_name: Display name.
        language: Target language code.
        l1_language: L1 language code.
        session_id: Optional session identifier.

    Returns:
        EAF XML as a formatted string.
    """
    audio_path = Path(audio_path)
    session_id = session_id or f"session_{int(time.time())}"
    ts_counter = [0]
    ann_counter = [0]

    # ── Root element ─────────────────────────────────────────────────
    root = Element("ANNOTATION_DOCUMENT")
    root.set("AUTHOR", "Contrastive Voice Profiling Engine")
    root.set("DATE", time.strftime("%Y-%m-%dT%H:%M:%S+00:00"))
    root.set("FORMAT", ELAN_FORMAT)
    root.set("VERSION", ELAN_FORMAT)
    root.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
    root.set("xsi:noNamespaceSchemaLocation", ELAN_SCHEMA)

    # ── Header ───────────────────────────────────────────────────────
    header = SubElement(root, "HEADER")
    header.set("MEDIA_FILE", "")
    header.set("TIME_UNITS", "milliseconds")

    media = SubElement(header, "MEDIA_DESCRIPTOR")
    media.set("MEDIA_URL", f"file:///{audio_path.resolve()}")
    media.set("MIME_TYPE", "audio/x-wav")
    media.set("RELATIVE_MEDIA_URL", f"./{audio_path.name}")

    # Properties
    for key, val in [
        ("speaker_id", speaker_id),
        ("student_name", student_name),
        ("l1_language", l1_language),
        ("target_language", language),
        ("session_id", session_id),
        ("generator", "contrastive-voice-profiling-engine"),
    ]:
        prop = SubElement(header, "PROPERTY")
        prop.set("NAME", key)
        prop.text = str(val)

    # ── Collect all time points ──────────────────────────────────────
    time_slots: list[tuple[str, int]] = []

    def _add_ts(ms: float) -> str:
        ts_id = _ts_id(ts_counter)
        time_slots.append((ts_id, _ms_to_elan(ms)))
        return ts_id

    # ── Build tiers ──────────────────────────────────────────────────
    tiers_data: list[dict] = []

    # Tier 1: Transcription (word-level)
    _build_transcription_tier(profile, tiers_data, _add_ts, ann_counter)

    # Tier 2: Phonemes
    _build_phoneme_tier(profile, tiers_data, _add_ts, ann_counter)

    # Tier 3: Prosody
    _build_prosody_tier(profile, tiers_data, _add_ts, ann_counter)

    # Tier 4: L1 Interference
    _build_l1_tier(profile, tiers_data, _add_ts, ann_counter, l1_language)

    # Tier 5: Voice Quality
    _build_voice_quality_tier(profile, tiers_data, _add_ts, ann_counter)

    # Tier 6: Cognitive Load
    _build_cognitive_load_tier(profile, tiers_data, _add_ts, ann_counter)

    # Tier 7: Connected Speech
    _build_connected_speech_tier(profile, tiers_data, _add_ts, ann_counter)

    # Tier 8: CIF Score
    _build_cif_tier(profile, tiers_data, _add_ts, ann_counter)

    # Tier 9: Emotion
    _build_emotion_tier(profile, tiers_data, _add_ts, ann_counter)

    # Tier 10: Metadata (single span)
    _build_metadata_tier(
        profile, tiers_data, _add_ts, ann_counter,
        speaker_id, student_name, language, l1_language, session_id,
    )

    # ── Write TIME_ORDER ─────────────────────────────────────────────
    time_order = SubElement(root, "TIME_ORDER")
    # Sort by time value for valid EAF
    time_slots.sort(key=lambda x: x[1])
    for ts_id, ts_val in time_slots:
        ts_el = SubElement(time_order, "TIME_SLOT")
        ts_el.set("TIME_SLOT_ID", ts_id)
        ts_el.set("TIME_VALUE", str(ts_val))

    # ── Write TIERs ──────────────────────────────────────────────────
    for tier_info in tiers_data:
        tier_el = SubElement(root, "TIER")
        tier_el.set("LINGUISTIC_TYPE_REF", tier_info.get("type_ref", "default-lt"))
        tier_el.set("TIER_ID", tier_info["tier_id"])
        if "participant" in tier_info:
            tier_el.set("PARTICIPANT", tier_info["participant"])
        if "annotator" in tier_info:
            tier_el.set("ANNOTATOR", tier_info["annotator"])

        for ann in tier_info.get("annotations", []):
            ann_el = SubElement(tier_el, "ANNOTATION")
            align_ann = SubElement(ann_el, "ALIGNABLE_ANNOTATION")
            align_ann.set("ANNOTATION_ID", ann["id"])
            align_ann.set("TIME_SLOT_REF1", ann["ts1"])
            align_ann.set("TIME_SLOT_REF2", ann["ts2"])
            value_el = SubElement(align_ann, "ANNOTATION_VALUE")
            value_el.text = ann["value"]

    # ── Linguistic Types ─────────────────────────────────────────────
    ling_types = [
        "default-lt", "phoneme-lt", "prosody-lt", "interference-lt",
        "voice-quality-lt", "cognitive-lt", "connected-speech-lt",
        "cif-lt", "emotion-lt", "metadata-lt",
    ]
    for lt in ling_types:
        lt_el = SubElement(root, "LINGUISTIC_TYPE")
        lt_el.set("GRAPHIC_REFERENCES", "false")
        lt_el.set("LINGUISTIC_TYPE_ID", lt)
        lt_el.set("TIME_ALIGNABLE", "true")

    # ── Constraints ──────────────────────────────────────────────────
    for stereo, desc in [
        ("Time_Subdivision", "Time subdivision of parent annotation's time interval"),
        ("Symbolic_Subdivision", "Symbolic subdivision of parent annotation's time interval"),
        ("Symbolic_Association", "1-1 association with a parent annotation"),
        ("Included_In", "Time included in parent annotation's time interval"),
    ]:
        con = SubElement(root, "CONSTRAINT")
        con.set("DESCRIPTION", desc)
        con.set("STEREOTYPE", stereo)

    # ── Format and return ────────────────────────────────────────────
    raw_xml = tostring(root, encoding="unicode")
    parsed = minidom.parseString(raw_xml)
    return parsed.toprettyxml(indent="  ", encoding=None)


# ── Tier Builders ────────────────────────────────────────────────────────

def _build_transcription_tier(profile, tiers, add_ts, ann_ctr):
    """Tier 1: Word-level transcription with timestamps."""
    trans = profile.get("transcription", {})
    word_ts = trans.get("word_timestamps", [])

    annotations = []
    for w in word_ts:
        if not w.get("word"):
            continue
        start = w.get("start", 0) * 1000 if w.get("start", 0) < 100 else w.get("start", 0)
        end = w.get("end", 0) * 1000 if w.get("end", 0) < 100 else w.get("end", 0)
        ts1 = add_ts(start)
        ts2 = add_ts(end)
        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": w["word"],
        })

    # If no word timestamps, use segment-level
    if not annotations:
        for seg in trans.get("segments", []):
            ts1 = add_ts(seg["start"] * 1000)
            ts2 = add_ts(seg["end"] * 1000)
            annotations.append({
                "id": _ann_id(ann_ctr),
                "ts1": ts1, "ts2": ts2,
                "value": seg.get("text", "").strip(),
            })

    tiers.append({
        "tier_id": "Transcription",
        "type_ref": "default-lt",
        "participant": "Speaker",
        "annotator": "whisper",
        "annotations": annotations,
    })


def _build_phoneme_tier(profile, tiers, add_ts, ann_ctr):
    """Tier 2: Phone-level segmentation."""
    # Try forced alignment first, fall back to phoneme_analysis
    fa = profile.get("forced_alignment", {})
    phones = fa.get("phones", [])

    # Fall back to Wav2Vec phoneme spans
    if not phones:
        pa = profile.get("phoneme_analysis", {})
        phones = pa.get("phoneme_details", [])

    annotations = []
    for p in phones:
        phone = p.get("phone") or p.get("phoneme", "")
        start = p.get("start_ms", 0)
        end = p.get("end_ms", start + p.get("duration_ms", 50))
        if not phone:
            continue
        ts1 = add_ts(start)
        ts2 = add_ts(end)
        conf = p.get("confidence", 0)
        source = p.get("source", "wav2vec")
        label = f"{phone} [{source}:{conf:.2f}]" if conf else phone
        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": label,
        })

    tiers.append({
        "tier_id": "Phonemes",
        "type_ref": "phoneme-lt",
        "annotator": "forced_alignment",
        "annotations": annotations,
    })


def _build_prosody_tier(profile, tiers, add_ts, ann_ctr):
    """Tier 3: Prosodic annotations (intonation, stress, rhythm)."""
    prosody = profile.get("prosodic_profile", {})
    annotations = []

    # Intonation pattern annotations
    intonation = prosody.get("intonation", {})
    if intonation:
        pattern = intonation.get("pattern", "unknown")
        boundary_tones = intonation.get("boundary_tones", [])
        duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

        ts1 = add_ts(0)
        ts2 = add_ts(duration_ms)
        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": f"Intonation: {pattern} | Tones: {', '.join(boundary_tones) if boundary_tones else 'N/A'}",
        })

    # Rhythm classification
    rhythm = prosody.get("rhythm", {})
    if rhythm:
        rhythm_class = rhythm.get("rhythm_class", "unknown")
        npvi = rhythm.get("nPVI_V", 0)
        duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
        ts1 = add_ts(0)
        ts2 = add_ts(duration_ms)
        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": f"Rhythm: {rhythm_class} (nPVI={npvi:.1f})",
        })

    # Stressed words
    stress = prosody.get("stress_patterns", [])
    for s in stress:
        if s.get("stressed"):
            start = s.get("start", 0)
            end = s.get("end", start + 200)
            # Convert seconds to ms if needed
            if start < 100:
                start *= 1000
                end *= 1000
            ts1 = add_ts(start)
            ts2 = add_ts(end)
            annotations.append({
                "id": _ann_id(ann_ctr),
                "ts1": ts1, "ts2": ts2,
                "value": f"STRESS: {s.get('word', '')}",
            })

    tiers.append({
        "tier_id": "Prosody",
        "type_ref": "prosody-lt",
        "annotator": "prosodic_profiling",
        "annotations": annotations,
    })


def _build_l1_tier(profile, tiers, add_ts, ann_ctr, l1_language):
    """Tier 4: L1 interference patterns."""
    l1 = profile.get("l1_interference", {})
    annotations = []

    duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

    # Overall interference score
    interference_score = l1.get("interference_score", 0)
    display_name = l1.get("l1_display_name", l1_language)
    ts1 = add_ts(0)
    ts2 = add_ts(duration_ms)
    annotations.append({
        "id": _ann_id(ann_ctr),
        "ts1": ts1, "ts2": ts2,
        "value": f"L1={display_name} | Interference={interference_score}/100",
    })

    # Individual detected patterns
    patterns = l1.get("detected_patterns", [])
    for pat in patterns:
        name = pat.get("pattern") or pat.get("name", "unknown")
        severity = pat.get("severity", "low")
        evidence = pat.get("evidence", "")
        remediation = pat.get("remediation", "")

        ts1 = add_ts(0)
        ts2 = add_ts(duration_ms)
        value = f"[{severity.upper()}] {name}"
        if evidence:
            value += f" | {evidence}"
        if remediation:
            value += f" | FIX: {remediation}"

        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": value,
        })

    tiers.append({
        "tier_id": "L1_Interference",
        "type_ref": "interference-lt",
        "annotator": "l1_targets",
        "annotations": annotations,
    })


def _build_voice_quality_tier(profile, tiers, add_ts, ann_ctr):
    """Tier 5: Voice quality annotations."""
    vq = profile.get("voice_quality", {})
    vs = profile.get("voicesauce", {})
    annotations = []

    duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

    # Phonation type
    phonation = vs.get("phonation_type") or vq.get("breathiness", {}).get("classification", "modal")
    register = vq.get("register", {}).get("type", "unknown")

    ts1 = add_ts(0)
    ts2 = add_ts(duration_ms)
    annotations.append({
        "id": _ann_id(ann_ctr),
        "ts1": ts1, "ts2": ts2,
        "value": f"Phonation: {phonation} | Register: {register}",
    })

    # VoiceSauce measures
    if vs:
        h1h2 = vs.get("H1_H2", {})
        cpp = vs.get("CPP", {})
        shr = vs.get("SHR", {})
        ts1 = add_ts(0)
        ts2 = add_ts(duration_ms)
        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": (
                f"H1-H2={h1h2.get('mean', 0):.1f}dB | "
                f"CPP={cpp.get('mean', 0):.1f}dB | "
                f"SHR={shr.get('mean', 0):.3f} | "
                f"Breathiness={vs.get('breathiness_index', 0):.2f} | "
                f"Creak={vs.get('creak_index', 0):.2f}"
            ),
        })

    # Nasality
    nas = vq.get("nasality", {})
    if nas:
        ts1 = add_ts(0)
        ts2 = add_ts(duration_ms)
        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": f"Nasality index: {nas.get('nasality_index', 0):.2f}",
        })

    tiers.append({
        "tier_id": "Voice_Quality",
        "type_ref": "voice-quality-lt",
        "annotator": "voice_quality+voicesauce",
        "annotations": annotations,
    })


def _build_cognitive_load_tier(profile, tiers, add_ts, ann_ctr):
    """Tier 6: Cognitive load markers (filled pauses, hesitations)."""
    mb = profile.get("morpheme_boundary", {})
    cog = mb.get("cognitive_load", {})
    annotations = []

    # Filled pauses
    indicators = cog.get("indicators", [])
    for ind in indicators:
        if isinstance(ind, str):
            # Simple string indicator — create utterance-level annotation
            duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
            ts1 = add_ts(0)
            ts2 = add_ts(duration_ms)
            annotations.append({
                "id": _ann_id(ann_ctr),
                "ts1": ts1, "ts2": ts2,
                "value": f"COGNITIVE: {ind}",
            })
        elif isinstance(ind, dict):
            start = ind.get("start_ms", 0)
            end = ind.get("end_ms", start + 200)
            ts1 = add_ts(start)
            ts2 = add_ts(end)
            annotations.append({
                "id": _ann_id(ann_ctr),
                "ts1": ts1, "ts2": ts2,
                "value": f"COGNITIVE: {ind.get('type', 'marker')} — {ind.get('description', '')}",
            })

    # Overall score
    score = cog.get("score", 0)
    if score > 0:
        duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
        ts1 = add_ts(0)
        ts2 = add_ts(duration_ms)
        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": f"Cognitive Load Score: {score}/100",
        })

    tiers.append({
        "tier_id": "Cognitive_Load",
        "type_ref": "cognitive-lt",
        "annotator": "morpheme_boundary",
        "annotations": annotations,
    })


def _build_connected_speech_tier(profile, tiers, add_ts, ann_ctr):
    """Tier 7: Connected speech processes (assimilation, elision, linking)."""
    cs = profile.get("connected_speech", {})
    annotations = []

    for process_type in ["assimilations", "elisions", "linkings", "reductions"]:
        events = cs.get(process_type, [])
        for ev in events:
            if isinstance(ev, dict):
                start = ev.get("start_ms", ev.get("position_ms", 0))
                end = ev.get("end_ms", start + 150)
                label = ev.get("label") or ev.get("type", process_type.rstrip("s"))
                context = ev.get("context", "")

                ts1 = add_ts(start)
                ts2 = add_ts(end)
                value = f"[{process_type.upper().rstrip('S')}] {label}"
                if context:
                    value += f" | {context}"
                annotations.append({
                    "id": _ann_id(ann_ctr),
                    "ts1": ts1, "ts2": ts2,
                    "value": value,
                })

    # Fluency score
    fluency = cs.get("fluency_score", 0)
    if fluency > 0:
        duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
        ts1 = add_ts(0)
        ts2 = add_ts(duration_ms)
        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": f"Fluency Score: {fluency}/100",
        })

    tiers.append({
        "tier_id": "Connected_Speech",
        "type_ref": "connected-speech-lt",
        "annotator": "connected_speech",
        "annotations": annotations,
    })


def _build_cif_tier(profile, tiers, add_ts, ann_ctr):
    """Tier 8: CIF (Contrastive Interference Field) scores."""
    cif = profile.get("cif_analysis", {})
    annotations = []

    duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

    overall_cii = cif.get("overall_cii", 0)
    severity = cif.get("overall_severity", "unknown")

    ts1 = add_ts(0)
    ts2 = add_ts(duration_ms)
    annotations.append({
        "id": _ann_id(ann_ctr),
        "ts1": ts1, "ts2": ts2,
        "value": f"CII={overall_cii:.3f} [{severity}]",
    })

    # Per-dimension CII
    dimensions = cif.get("dimensions", {})
    for dim_name, dim_data in dimensions.items():
        if isinstance(dim_data, dict):
            dim_cii = dim_data.get("cii", 0)
            ts1 = add_ts(0)
            ts2 = add_ts(duration_ms)
            annotations.append({
                "id": _ann_id(ann_ctr),
                "ts1": ts1, "ts2": ts2,
                "value": f"CIF-{dim_name}: {dim_cii:.3f}",
            })

    tiers.append({
        "tier_id": "CIF_Score",
        "type_ref": "cif-lt",
        "annotator": "cif_model",
        "annotations": annotations,
    })


def _build_emotion_tier(profile, tiers, add_ts, ann_ctr):
    """Tier 9: Emotional analysis annotations."""
    annotations = []
    duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

    # SpeechBrain emotion
    ai = profile.get("ai_classification", {})
    sb = ai.get("speechbrain", {})
    if sb:
        emotion = sb.get("emotion", {})
        if emotion:
            label = emotion.get("label", "unknown")
            conf = emotion.get("confidence", 0)
            ts1 = add_ts(0)
            ts2 = add_ts(duration_ms)
            annotations.append({
                "id": _ann_id(ann_ctr),
                "ts1": ts1, "ts2": ts2,
                "value": f"SpeechBrain: {label} ({conf:.2f})",
            })

    # auDeep emotion
    audeep = profile.get("audeep", {})
    if audeep:
        primary = audeep.get("primary_emotion", "neutral")
        conf = audeep.get("emotion_confidence", 0)
        v = audeep.get("valence", 0)
        a = audeep.get("arousal", 0)
        d = audeep.get("dominance", 0)
        ts1 = add_ts(0)
        ts2 = add_ts(duration_ms)
        annotations.append({
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": f"auDeep: {primary} ({conf:.2f}) | V={v:.2f} A={a:.2f} D={d:.2f}",
        })

    tiers.append({
        "tier_id": "Emotion",
        "type_ref": "emotion-lt",
        "annotator": "speechbrain+audeep",
        "annotations": annotations,
    })


def _build_metadata_tier(profile, tiers, add_ts, ann_ctr,
                         speaker_id, student_name, language, l1_language, session_id):
    """Tier 10: Session metadata."""
    duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
    processing_ms = profile.get("processing_time_ms", 0)

    ts1 = add_ts(0)
    ts2 = add_ts(duration_ms)

    meta_str = (
        f"Speaker: {speaker_id} ({student_name}) | "
        f"L1: {l1_language} → L2: {language} | "
        f"Session: {session_id} | "
        f"Processing: {processing_ms:.0f}ms"
    )

    tiers.append({
        "tier_id": "Speaker_Metadata",
        "type_ref": "metadata-lt",
        "participant": speaker_id,
        "annotations": [{
            "id": _ann_id(ann_ctr),
            "ts1": ts1, "ts2": ts2,
            "value": meta_str,
        }],
    })


# ── Public API ───────────────────────────────────────────────────────────

def export_eaf(
    profile: dict[str, Any],
    audio_path: Path | str,
    output_path: Path | str | None = None,
    speaker_id: str = "anonymous",
    student_name: str = "Student",
    language: str = "en",
    l1_language: str = "bho",
    session_id: str | None = None,
) -> tuple[str, Path]:
    """Export analysis results to ELAN .eaf format.

    Args:
        profile: Full pipeline output dict.
        audio_path: Path to source audio.
        output_path: Where to save the .eaf file. If None, saves next to audio.
        speaker_id: Speaker identifier.
        student_name: Display name.
        language: Target language.
        l1_language: L1 language code.
        session_id: Session identifier.

    Returns:
        Tuple of (eaf_xml_string, output_path).
    """
    audio_path = Path(audio_path)
    if output_path is None:
        output_path = audio_path.with_suffix(".eaf")
    else:
        output_path = Path(output_path)

    eaf_xml = generate_eaf(
        profile=profile,
        audio_path=audio_path,
        speaker_id=speaker_id,
        student_name=student_name,
        language=language,
        l1_language=l1_language,
        session_id=session_id,
    )

    output_path.write_text(eaf_xml, encoding="utf-8")
    logger.info("ELAN export saved to %s", output_path)

    return eaf_xml, output_path