Spaces:

Shaankar39
/

vaani-cavp-engine

Build error

App Files Files Community

vaani-cavp-engine / modules /elan_export.py

Shaankar39

init: Vaani CAVP engine (CPU, accuracy-first — Whisper large-v3, spaCy trf)

7d5f092 about 1 month ago

raw

history blame contribute delete

25.4 kB

	"""ELAN Annotation Format (.eaf) Export

	Generates standard ELAN XML files from voice profile analysis results.
	Produces a multi-tier annotation corpus suitable for:
	- PhD linguistic research
	- Corpus-based phonological studies
	- Cross-speaker contrastive analysis
	- Longitudinal acquisition tracking

	ELAN spec: https://archive.mpi.nl/tla/elan/documentation

	Tiers generated:
	1. Transcription — Word-level transcript with timestamps
	2. Phonemes — Phone-level segmentation (from forced alignment or Wav2Vec)
	3. Prosody — Intonation patterns, stress, rhythm annotations
	4. L1_Interference — Detected L1 transfer patterns with severity
	5. Voice_Quality — Phonation type, breathiness, creak annotations
	6. Cognitive_Load — Filled pauses, hesitations, self-corrections
	7. Connected_Speech — Assimilation, elision, linking events
	8. CIF_Score — Overall Contrastive Interference Index per segment
	9. Emotion — Emotional valence/arousal labels
	10. Speaker_Metadata — Speaker ID, language, session info
	"""

	from __future__ import annotations

	import logging
	import time
	import uuid
	from pathlib import Path
	from typing import Any
	from xml.etree.ElementTree import Element, SubElement, tostring
	from xml.dom import minidom

	logger = logging.getLogger(__name__)

	# ELAN namespace and schema
	ELAN_SCHEMA = "http://www.mpi.nl/tools/elan/EAFv3.0.xsd"
	ELAN_FORMAT = "3.0"


	def _ts_id(counter: list[int]) -> str:
	"""Generate a unique time slot ID."""
	counter[0] += 1
	return f"ts{counter[0]}"


	def _ann_id(counter: list[int]) -> str:
	"""Generate a unique annotation ID."""
	counter[0] += 1
	return f"a{counter[0]}"


	def _ms_to_elan(ms: float) -> int:
	"""Convert milliseconds (float) to ELAN time value (int ms)."""
	return int(round(ms))


	def generate_eaf(
	profile: dict[str, Any],
	audio_path: Path \| str,
	speaker_id: str = "anonymous",
	student_name: str = "Student",
	language: str = "en",
	l1_language: str = "bho",
	session_id: str \| None = None,
	) -> str:
	"""Generate a complete ELAN .eaf XML document from analysis results.

	Args:
	profile: Full pipeline output dict (all 10 layers).
	audio_path: Path to the source audio file.
	speaker_id: Unique speaker identifier.
	student_name: Display name.
	language: Target language code.
	l1_language: L1 language code.
	session_id: Optional session identifier.

	Returns:
	EAF XML as a formatted string.
	"""
	audio_path = Path(audio_path)
	session_id = session_id or f"session_{int(time.time())}"
	ts_counter = [0]
	ann_counter = [0]

	# ── Root element ─────────────────────────────────────────────────
	root = Element("ANNOTATION_DOCUMENT")
	root.set("AUTHOR", "Contrastive Voice Profiling Engine")
	root.set("DATE", time.strftime("%Y-%m-%dT%H:%M:%S+00:00"))
	root.set("FORMAT", ELAN_FORMAT)
	root.set("VERSION", ELAN_FORMAT)
	root.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
	root.set("xsi:noNamespaceSchemaLocation", ELAN_SCHEMA)

	# ── Header ───────────────────────────────────────────────────────
	header = SubElement(root, "HEADER")
	header.set("MEDIA_FILE", "")
	header.set("TIME_UNITS", "milliseconds")

	media = SubElement(header, "MEDIA_DESCRIPTOR")
	media.set("MEDIA_URL", f"file:///{audio_path.resolve()}")
	media.set("MIME_TYPE", "audio/x-wav")
	media.set("RELATIVE_MEDIA_URL", f"./{audio_path.name}")

	# Properties
	for key, val in [
	("speaker_id", speaker_id),
	("student_name", student_name),
	("l1_language", l1_language),
	("target_language", language),
	("session_id", session_id),
	("generator", "contrastive-voice-profiling-engine"),
	]:
	prop = SubElement(header, "PROPERTY")
	prop.set("NAME", key)
	prop.text = str(val)

	# ── Collect all time points ──────────────────────────────────────
	time_slots: list[tuple[str, int]] = []

	def _add_ts(ms: float) -> str:
	ts_id = _ts_id(ts_counter)
	time_slots.append((ts_id, _ms_to_elan(ms)))
	return ts_id

	# ── Build tiers ──────────────────────────────────────────────────
	tiers_data: list[dict] = []

	# Tier 1: Transcription (word-level)
	_build_transcription_tier(profile, tiers_data, _add_ts, ann_counter)

	# Tier 2: Phonemes
	_build_phoneme_tier(profile, tiers_data, _add_ts, ann_counter)

	# Tier 3: Prosody
	_build_prosody_tier(profile, tiers_data, _add_ts, ann_counter)

	# Tier 4: L1 Interference
	_build_l1_tier(profile, tiers_data, _add_ts, ann_counter, l1_language)

	# Tier 5: Voice Quality
	_build_voice_quality_tier(profile, tiers_data, _add_ts, ann_counter)

	# Tier 6: Cognitive Load
	_build_cognitive_load_tier(profile, tiers_data, _add_ts, ann_counter)

	# Tier 7: Connected Speech
	_build_connected_speech_tier(profile, tiers_data, _add_ts, ann_counter)

	# Tier 8: CIF Score
	_build_cif_tier(profile, tiers_data, _add_ts, ann_counter)

	# Tier 9: Emotion
	_build_emotion_tier(profile, tiers_data, _add_ts, ann_counter)

	# Tier 10: Metadata (single span)
	_build_metadata_tier(
	profile, tiers_data, _add_ts, ann_counter,
	speaker_id, student_name, language, l1_language, session_id,
	)

	# ── Write TIME_ORDER ─────────────────────────────────────────────
	time_order = SubElement(root, "TIME_ORDER")
	# Sort by time value for valid EAF
	time_slots.sort(key=lambda x: x[1])
	for ts_id, ts_val in time_slots:
	ts_el = SubElement(time_order, "TIME_SLOT")
	ts_el.set("TIME_SLOT_ID", ts_id)
	ts_el.set("TIME_VALUE", str(ts_val))

	# ── Write TIERs ──────────────────────────────────────────────────
	for tier_info in tiers_data:
	tier_el = SubElement(root, "TIER")
	tier_el.set("LINGUISTIC_TYPE_REF", tier_info.get("type_ref", "default-lt"))
	tier_el.set("TIER_ID", tier_info["tier_id"])
	if "participant" in tier_info:
	tier_el.set("PARTICIPANT", tier_info["participant"])
	if "annotator" in tier_info:
	tier_el.set("ANNOTATOR", tier_info["annotator"])

	for ann in tier_info.get("annotations", []):
	ann_el = SubElement(tier_el, "ANNOTATION")
	align_ann = SubElement(ann_el, "ALIGNABLE_ANNOTATION")
	align_ann.set("ANNOTATION_ID", ann["id"])
	align_ann.set("TIME_SLOT_REF1", ann["ts1"])
	align_ann.set("TIME_SLOT_REF2", ann["ts2"])
	value_el = SubElement(align_ann, "ANNOTATION_VALUE")
	value_el.text = ann["value"]

	# ── Linguistic Types ─────────────────────────────────────────────
	ling_types = [
	"default-lt", "phoneme-lt", "prosody-lt", "interference-lt",
	"voice-quality-lt", "cognitive-lt", "connected-speech-lt",
	"cif-lt", "emotion-lt", "metadata-lt",
	]
	for lt in ling_types:
	lt_el = SubElement(root, "LINGUISTIC_TYPE")
	lt_el.set("GRAPHIC_REFERENCES", "false")
	lt_el.set("LINGUISTIC_TYPE_ID", lt)
	lt_el.set("TIME_ALIGNABLE", "true")

	# ── Constraints ──────────────────────────────────────────────────
	for stereo, desc in [
	("Time_Subdivision", "Time subdivision of parent annotation's time interval"),
	("Symbolic_Subdivision", "Symbolic subdivision of parent annotation's time interval"),
	("Symbolic_Association", "1-1 association with a parent annotation"),
	("Included_In", "Time included in parent annotation's time interval"),
	]:
	con = SubElement(root, "CONSTRAINT")
	con.set("DESCRIPTION", desc)
	con.set("STEREOTYPE", stereo)

	# ── Format and return ────────────────────────────────────────────
	raw_xml = tostring(root, encoding="unicode")
	parsed = minidom.parseString(raw_xml)
	return parsed.toprettyxml(indent=" ", encoding=None)


	# ── Tier Builders ────────────────────────────────────────────────────────

	def _build_transcription_tier(profile, tiers, add_ts, ann_ctr):
	"""Tier 1: Word-level transcription with timestamps."""
	trans = profile.get("transcription", {})
	word_ts = trans.get("word_timestamps", [])

	annotations = []
	for w in word_ts:
	if not w.get("word"):
	continue
	start = w.get("start", 0) * 1000 if w.get("start", 0) < 100 else w.get("start", 0)
	end = w.get("end", 0) * 1000 if w.get("end", 0) < 100 else w.get("end", 0)
	ts1 = add_ts(start)
	ts2 = add_ts(end)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": w["word"],
	})

	# If no word timestamps, use segment-level
	if not annotations:
	for seg in trans.get("segments", []):
	ts1 = add_ts(seg["start"] * 1000)
	ts2 = add_ts(seg["end"] * 1000)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": seg.get("text", "").strip(),
	})

	tiers.append({
	"tier_id": "Transcription",
	"type_ref": "default-lt",
	"participant": "Speaker",
	"annotator": "whisper",
	"annotations": annotations,
	})


	def _build_phoneme_tier(profile, tiers, add_ts, ann_ctr):
	"""Tier 2: Phone-level segmentation."""
	# Try forced alignment first, fall back to phoneme_analysis
	fa = profile.get("forced_alignment", {})
	phones = fa.get("phones", [])

	# Fall back to Wav2Vec phoneme spans
	if not phones:
	pa = profile.get("phoneme_analysis", {})
	phones = pa.get("phoneme_details", [])

	annotations = []
	for p in phones:
	phone = p.get("phone") or p.get("phoneme", "")
	start = p.get("start_ms", 0)
	end = p.get("end_ms", start + p.get("duration_ms", 50))
	if not phone:
	continue
	ts1 = add_ts(start)
	ts2 = add_ts(end)
	conf = p.get("confidence", 0)
	source = p.get("source", "wav2vec")
	label = f"{phone} [{source}:{conf:.2f}]" if conf else phone
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": label,
	})

	tiers.append({
	"tier_id": "Phonemes",
	"type_ref": "phoneme-lt",
	"annotator": "forced_alignment",
	"annotations": annotations,
	})


	def _build_prosody_tier(profile, tiers, add_ts, ann_ctr):
	"""Tier 3: Prosodic annotations (intonation, stress, rhythm)."""
	prosody = profile.get("prosodic_profile", {})
	annotations = []

	# Intonation pattern annotations
	intonation = prosody.get("intonation", {})
	if intonation:
	pattern = intonation.get("pattern", "unknown")
	boundary_tones = intonation.get("boundary_tones", [])
	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"Intonation: {pattern} \| Tones: {', '.join(boundary_tones) if boundary_tones else 'N/A'}",
	})

	# Rhythm classification
	rhythm = prosody.get("rhythm", {})
	if rhythm:
	rhythm_class = rhythm.get("rhythm_class", "unknown")
	npvi = rhythm.get("nPVI_V", 0)
	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"Rhythm: {rhythm_class} (nPVI={npvi:.1f})",
	})

	# Stressed words
	stress = prosody.get("stress_patterns", [])
	for s in stress:
	if s.get("stressed"):
	start = s.get("start", 0)
	end = s.get("end", start + 200)
	# Convert seconds to ms if needed
	if start < 100:
	start *= 1000
	end *= 1000
	ts1 = add_ts(start)
	ts2 = add_ts(end)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"STRESS: {s.get('word', '')}",
	})

	tiers.append({
	"tier_id": "Prosody",
	"type_ref": "prosody-lt",
	"annotator": "prosodic_profiling",
	"annotations": annotations,
	})


	def _build_l1_tier(profile, tiers, add_ts, ann_ctr, l1_language):
	"""Tier 4: L1 interference patterns."""
	l1 = profile.get("l1_interference", {})
	annotations = []

	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

	# Overall interference score
	interference_score = l1.get("interference_score", 0)
	display_name = l1.get("l1_display_name", l1_language)
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"L1={display_name} \| Interference={interference_score}/100",
	})

	# Individual detected patterns
	patterns = l1.get("detected_patterns", [])
	for pat in patterns:
	name = pat.get("pattern") or pat.get("name", "unknown")
	severity = pat.get("severity", "low")
	evidence = pat.get("evidence", "")
	remediation = pat.get("remediation", "")

	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	value = f"[{severity.upper()}] {name}"
	if evidence:
	value += f" \| {evidence}"
	if remediation:
	value += f" \| FIX: {remediation}"

	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": value,
	})

	tiers.append({
	"tier_id": "L1_Interference",
	"type_ref": "interference-lt",
	"annotator": "l1_targets",
	"annotations": annotations,
	})


	def _build_voice_quality_tier(profile, tiers, add_ts, ann_ctr):
	"""Tier 5: Voice quality annotations."""
	vq = profile.get("voice_quality", {})
	vs = profile.get("voicesauce", {})
	annotations = []

	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

	# Phonation type
	phonation = vs.get("phonation_type") or vq.get("breathiness", {}).get("classification", "modal")
	register = vq.get("register", {}).get("type", "unknown")

	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"Phonation: {phonation} \| Register: {register}",
	})

	# VoiceSauce measures
	if vs:
	h1h2 = vs.get("H1_H2", {})
	cpp = vs.get("CPP", {})
	shr = vs.get("SHR", {})
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": (
	f"H1-H2={h1h2.get('mean', 0):.1f}dB \| "
	f"CPP={cpp.get('mean', 0):.1f}dB \| "
	f"SHR={shr.get('mean', 0):.3f} \| "
	f"Breathiness={vs.get('breathiness_index', 0):.2f} \| "
	f"Creak={vs.get('creak_index', 0):.2f}"
	),
	})

	# Nasality
	nas = vq.get("nasality", {})
	if nas:
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"Nasality index: {nas.get('nasality_index', 0):.2f}",
	})

	tiers.append({
	"tier_id": "Voice_Quality",
	"type_ref": "voice-quality-lt",
	"annotator": "voice_quality+voicesauce",
	"annotations": annotations,
	})


	def _build_cognitive_load_tier(profile, tiers, add_ts, ann_ctr):
	"""Tier 6: Cognitive load markers (filled pauses, hesitations)."""
	mb = profile.get("morpheme_boundary", {})
	cog = mb.get("cognitive_load", {})
	annotations = []

	# Filled pauses
	indicators = cog.get("indicators", [])
	for ind in indicators:
	if isinstance(ind, str):
	# Simple string indicator — create utterance-level annotation
	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"COGNITIVE: {ind}",
	})
	elif isinstance(ind, dict):
	start = ind.get("start_ms", 0)
	end = ind.get("end_ms", start + 200)
	ts1 = add_ts(start)
	ts2 = add_ts(end)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"COGNITIVE: {ind.get('type', 'marker')} — {ind.get('description', '')}",
	})

	# Overall score
	score = cog.get("score", 0)
	if score > 0:
	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"Cognitive Load Score: {score}/100",
	})

	tiers.append({
	"tier_id": "Cognitive_Load",
	"type_ref": "cognitive-lt",
	"annotator": "morpheme_boundary",
	"annotations": annotations,
	})


	def _build_connected_speech_tier(profile, tiers, add_ts, ann_ctr):
	"""Tier 7: Connected speech processes (assimilation, elision, linking)."""
	cs = profile.get("connected_speech", {})
	annotations = []

	for process_type in ["assimilations", "elisions", "linkings", "reductions"]:
	events = cs.get(process_type, [])
	for ev in events:
	if isinstance(ev, dict):
	start = ev.get("start_ms", ev.get("position_ms", 0))
	end = ev.get("end_ms", start + 150)
	label = ev.get("label") or ev.get("type", process_type.rstrip("s"))
	context = ev.get("context", "")

	ts1 = add_ts(start)
	ts2 = add_ts(end)
	value = f"[{process_type.upper().rstrip('S')}] {label}"
	if context:
	value += f" \| {context}"
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": value,
	})

	# Fluency score
	fluency = cs.get("fluency_score", 0)
	if fluency > 0:
	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"Fluency Score: {fluency}/100",
	})

	tiers.append({
	"tier_id": "Connected_Speech",
	"type_ref": "connected-speech-lt",
	"annotator": "connected_speech",
	"annotations": annotations,
	})


	def _build_cif_tier(profile, tiers, add_ts, ann_ctr):
	"""Tier 8: CIF (Contrastive Interference Field) scores."""
	cif = profile.get("cif_analysis", {})
	annotations = []

	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

	overall_cii = cif.get("overall_cii", 0)
	severity = cif.get("overall_severity", "unknown")

	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"CII={overall_cii:.3f} [{severity}]",
	})

	# Per-dimension CII
	dimensions = cif.get("dimensions", {})
	for dim_name, dim_data in dimensions.items():
	if isinstance(dim_data, dict):
	dim_cii = dim_data.get("cii", 0)
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"CIF-{dim_name}: {dim_cii:.3f}",
	})

	tiers.append({
	"tier_id": "CIF_Score",
	"type_ref": "cif-lt",
	"annotator": "cif_model",
	"annotations": annotations,
	})


	def _build_emotion_tier(profile, tiers, add_ts, ann_ctr):
	"""Tier 9: Emotional analysis annotations."""
	annotations = []
	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000

	# SpeechBrain emotion
	ai = profile.get("ai_classification", {})
	sb = ai.get("speechbrain", {})
	if sb:
	emotion = sb.get("emotion", {})
	if emotion:
	label = emotion.get("label", "unknown")
	conf = emotion.get("confidence", 0)
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"SpeechBrain: {label} ({conf:.2f})",
	})

	# auDeep emotion
	audeep = profile.get("audeep", {})
	if audeep:
	primary = audeep.get("primary_emotion", "neutral")
	conf = audeep.get("emotion_confidence", 0)
	v = audeep.get("valence", 0)
	a = audeep.get("arousal", 0)
	d = audeep.get("dominance", 0)
	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)
	annotations.append({
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": f"auDeep: {primary} ({conf:.2f}) \| V={v:.2f} A={a:.2f} D={d:.2f}",
	})

	tiers.append({
	"tier_id": "Emotion",
	"type_ref": "emotion-lt",
	"annotator": "speechbrain+audeep",
	"annotations": annotations,
	})


	def _build_metadata_tier(profile, tiers, add_ts, ann_ctr,
	speaker_id, student_name, language, l1_language, session_id):
	"""Tier 10: Session metadata."""
	duration_ms = profile.get("transcription", {}).get("duration_seconds", 5) * 1000
	processing_ms = profile.get("processing_time_ms", 0)

	ts1 = add_ts(0)
	ts2 = add_ts(duration_ms)

	meta_str = (
	f"Speaker: {speaker_id} ({student_name}) \| "
	f"L1: {l1_language} → L2: {language} \| "
	f"Session: {session_id} \| "
	f"Processing: {processing_ms:.0f}ms"
	)

	tiers.append({
	"tier_id": "Speaker_Metadata",
	"type_ref": "metadata-lt",
	"participant": speaker_id,
	"annotations": [{
	"id": _ann_id(ann_ctr),
	"ts1": ts1, "ts2": ts2,
	"value": meta_str,
	}],
	})


	# ── Public API ───────────────────────────────────────────────────────────

	def export_eaf(
	profile: dict[str, Any],
	audio_path: Path \| str,
	output_path: Path \| str \| None = None,
	speaker_id: str = "anonymous",
	student_name: str = "Student",
	language: str = "en",
	l1_language: str = "bho",
	session_id: str \| None = None,
	) -> tuple[str, Path]:
	"""Export analysis results to ELAN .eaf format.

	Args:
	profile: Full pipeline output dict.
	audio_path: Path to source audio.
	output_path: Where to save the .eaf file. If None, saves next to audio.
	speaker_id: Speaker identifier.
	student_name: Display name.
	language: Target language.
	l1_language: L1 language code.
	session_id: Session identifier.

	Returns:
	Tuple of (eaf_xml_string, output_path).
	"""
	audio_path = Path(audio_path)
	if output_path is None:
	output_path = audio_path.with_suffix(".eaf")
	else:
	output_path = Path(output_path)

	eaf_xml = generate_eaf(
	profile=profile,
	audio_path=audio_path,
	speaker_id=speaker_id,
	student_name=student_name,
	language=language,
	l1_language=l1_language,
	session_id=session_id,
	)

	output_path.write_text(eaf_xml, encoding="utf-8")
	logger.info("ELAN export saved to %s", output_path)

	return eaf_xml, output_path