""" VoiceNote AI - Utilities WER calculation, VIPS formatting, evaluation export """ import json import logging from datetime import datetime from config import Config logger = logging.getLogger(__name__) def calculate_wer(reference: str, hypothesis: str) -> float: """ Calculate Word Error Rate (WER). WER = (Substitutions + Deletions + Insertions) / N where N = total words in reference. Args: reference: Ground truth transcription hypothesis: Whisper output Returns: WER as percentage (0–100). Returns 0.0 if reference is empty. """ if not reference or not reference.strip(): return 0.0 ref_words = reference.strip().lower().split() hyp_words = hypothesis.strip().lower().split() # Dynamic programming edit distance d = [[0] * (len(hyp_words) + 1) for _ in range(len(ref_words) + 1)] for i in range(len(ref_words) + 1): d[i][0] = i for j in range(len(hyp_words) + 1): d[0][j] = j for i in range(1, len(ref_words) + 1): for j in range(1, len(hyp_words) + 1): if ref_words[i - 1] == hyp_words[j - 1]: d[i][j] = d[i - 1][j - 1] else: d[i][j] = 1 + min(d[i - 1][j - 1], d[i][j - 1], d[i - 1][j]) wer = (d[len(ref_words)][len(hyp_words)] / len(ref_words)) * 100 return round(wer, 2) def format_vips_output(vips: dict) -> str: """Format a VIPS dict as readable Swedish clinical text.""" labels = { "V": "V (Välbefinnande)", "I": "I (Integritet)", "P": "P (Prevention)", "S": "S (Säkerhet)", } lines = [f"{labels.get(k, k)}: {vips.get(k, 'Ingen relevant information.')}" for k in ["V", "I", "P", "S"]] return "\n".join(lines) def save_evaluation(entry: dict) -> str: """ Append an evaluation entry to the JSONL file. Args: entry: Dict with evaluation answers Returns: Status message """ entry["timestamp"] = datetime.utcnow().isoformat() + "Z" try: with open(Config.EVAL_FILE, "a", encoding="utf-8") as f: f.write(json.dumps(entry, ensure_ascii=False) + "\n") logger.info(f"Evaluation saved: {entry}") return "✅ Utvärdering sparad!" except Exception as e: logger.error(f"Failed to save evaluation: {e}") return f"⚠️ Kunde inte spara: {e}" def format_timestamp() -> str: return datetime.now().strftime("%H:%M:%S")