x / utils.py
SsebaA's picture
Update utils.py
07462e8 verified
"""
VoiceNote AI - Utilities
WER calculation, VIPS formatting, evaluation export
"""
import json
import logging
from datetime import datetime
from config import Config
logger = logging.getLogger(__name__)
def calculate_wer(reference: str, hypothesis: str) -> float:
"""
Calculate Word Error Rate (WER).
WER = (Substitutions + Deletions + Insertions) / N
where N = total words in reference.
Args:
reference: Ground truth transcription
hypothesis: Whisper output
Returns:
WER as percentage (0–100). Returns 0.0 if reference is empty.
"""
if not reference or not reference.strip():
return 0.0
ref_words = reference.strip().lower().split()
hyp_words = hypothesis.strip().lower().split()
# Dynamic programming edit distance
d = [[0] * (len(hyp_words) + 1) for _ in range(len(ref_words) + 1)]
for i in range(len(ref_words) + 1):
d[i][0] = i
for j in range(len(hyp_words) + 1):
d[0][j] = j
for i in range(1, len(ref_words) + 1):
for j in range(1, len(hyp_words) + 1):
if ref_words[i - 1] == hyp_words[j - 1]:
d[i][j] = d[i - 1][j - 1]
else:
d[i][j] = 1 + min(d[i - 1][j - 1], d[i][j - 1], d[i - 1][j])
wer = (d[len(ref_words)][len(hyp_words)] / len(ref_words)) * 100
return round(wer, 2)
def format_vips_output(vips: dict) -> str:
"""Format a VIPS dict as readable Swedish clinical text."""
labels = {
"V": "V (Välbefinnande)",
"I": "I (Integritet)",
"P": "P (Prevention)",
"S": "S (Säkerhet)",
}
lines = [f"{labels.get(k, k)}: {vips.get(k, 'Ingen relevant information.')}"
for k in ["V", "I", "P", "S"]]
return "\n".join(lines)
def save_evaluation(entry: dict) -> str:
"""
Append an evaluation entry to the JSONL file.
Args:
entry: Dict with evaluation answers
Returns:
Status message
"""
entry["timestamp"] = datetime.utcnow().isoformat() + "Z"
try:
with open(Config.EVAL_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
logger.info(f"Evaluation saved: {entry}")
return "✅ Utvärdering sparad!"
except Exception as e:
logger.error(f"Failed to save evaluation: {e}")
return f"⚠️ Kunde inte spara: {e}"
def format_timestamp() -> str:
return datetime.now().strftime("%H:%M:%S")