| """ |
| VoiceNote AI - Utilities |
| WER calculation, VIPS formatting, evaluation export |
| """ |
|
|
| import json |
| import logging |
| from datetime import datetime |
| from config import Config |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| def calculate_wer(reference: str, hypothesis: str) -> float: |
| """ |
| Calculate Word Error Rate (WER). |
| |
| WER = (Substitutions + Deletions + Insertions) / N |
| where N = total words in reference. |
| |
| Args: |
| reference: Ground truth transcription |
| hypothesis: Whisper output |
| |
| Returns: |
| WER as percentage (0–100). Returns 0.0 if reference is empty. |
| """ |
| if not reference or not reference.strip(): |
| return 0.0 |
|
|
| ref_words = reference.strip().lower().split() |
| hyp_words = hypothesis.strip().lower().split() |
|
|
| |
| d = [[0] * (len(hyp_words) + 1) for _ in range(len(ref_words) + 1)] |
| for i in range(len(ref_words) + 1): |
| d[i][0] = i |
| for j in range(len(hyp_words) + 1): |
| d[0][j] = j |
|
|
| for i in range(1, len(ref_words) + 1): |
| for j in range(1, len(hyp_words) + 1): |
| if ref_words[i - 1] == hyp_words[j - 1]: |
| d[i][j] = d[i - 1][j - 1] |
| else: |
| d[i][j] = 1 + min(d[i - 1][j - 1], d[i][j - 1], d[i - 1][j]) |
|
|
| wer = (d[len(ref_words)][len(hyp_words)] / len(ref_words)) * 100 |
| return round(wer, 2) |
|
|
|
|
| def format_vips_output(vips: dict) -> str: |
| """Format a VIPS dict as readable Swedish clinical text.""" |
| labels = { |
| "V": "V (Välbefinnande)", |
| "I": "I (Integritet)", |
| "P": "P (Prevention)", |
| "S": "S (Säkerhet)", |
| } |
| lines = [f"{labels.get(k, k)}: {vips.get(k, 'Ingen relevant information.')}" |
| for k in ["V", "I", "P", "S"]] |
| return "\n".join(lines) |
|
|
|
|
| def save_evaluation(entry: dict) -> str: |
| """ |
| Append an evaluation entry to the JSONL file. |
| |
| Args: |
| entry: Dict with evaluation answers |
| |
| Returns: |
| Status message |
| """ |
| entry["timestamp"] = datetime.utcnow().isoformat() + "Z" |
| try: |
| with open(Config.EVAL_FILE, "a", encoding="utf-8") as f: |
| f.write(json.dumps(entry, ensure_ascii=False) + "\n") |
| logger.info(f"Evaluation saved: {entry}") |
| return "✅ Utvärdering sparad!" |
| except Exception as e: |
| logger.error(f"Failed to save evaluation: {e}") |
| return f"⚠️ Kunde inte spara: {e}" |
|
|
|
|
| def format_timestamp() -> str: |
| return datetime.now().strftime("%H:%M:%S") |