Spaces:

SsebaA
/

x

Running on Zero

File size: 2,487 Bytes

ec5faa7
 
07462e8
ec5faa7
 
07462e8
ec5faa7
 
 
 
 
 
 
33cafa8
 
07462e8
 
 
 
 
33cafa8
07462e8
 
 
33cafa8
07462e8
33cafa8
 
 
07462e8
 
 
 
 
33cafa8
 
 
 
 
07462e8
33cafa8
 
07462e8
 
33cafa8
07462e8
 
33cafa8
 
ec5faa7
 
33cafa8
07462e8
 
a723498
 
 
07462e8
a723498
07462e8
 
 
a723498
 
07462e8
33cafa8
07462e8
ec5faa7
33cafa8
07462e8
33cafa8
 
07462e8
33cafa8
07462e8
ec5faa7
07462e8
 
 
 
33cafa8
07462e8

"""
VoiceNote AI - Utilities
WER calculation, VIPS formatting, evaluation export
"""

import json
import logging
from datetime import datetime
from config import Config

logger = logging.getLogger(__name__)


def calculate_wer(reference: str, hypothesis: str) -> float:
    """
    Calculate Word Error Rate (WER).

    WER = (Substitutions + Deletions + Insertions) / N
    where N = total words in reference.

    Args:
        reference: Ground truth transcription
        hypothesis: Whisper output

    Returns:
        WER as percentage (0–100). Returns 0.0 if reference is empty.
    """
    if not reference or not reference.strip():
        return 0.0

    ref_words = reference.strip().lower().split()
    hyp_words = hypothesis.strip().lower().split()

    # Dynamic programming edit distance
    d = [[0] * (len(hyp_words) + 1) for _ in range(len(ref_words) + 1)]
    for i in range(len(ref_words) + 1):
        d[i][0] = i
    for j in range(len(hyp_words) + 1):
        d[0][j] = j

    for i in range(1, len(ref_words) + 1):
        for j in range(1, len(hyp_words) + 1):
            if ref_words[i - 1] == hyp_words[j - 1]:
                d[i][j] = d[i - 1][j - 1]
            else:
                d[i][j] = 1 + min(d[i - 1][j - 1], d[i][j - 1], d[i - 1][j])

    wer = (d[len(ref_words)][len(hyp_words)] / len(ref_words)) * 100
    return round(wer, 2)


def format_vips_output(vips: dict) -> str:
    """Format a VIPS dict as readable Swedish clinical text."""
    labels = {
        "V": "V (Välbefinnande)",
        "I": "I (Integritet)",
        "P": "P (Prevention)",
        "S": "S (Säkerhet)",
    }
    lines = [f"{labels.get(k, k)}: {vips.get(k, 'Ingen relevant information.')}"
             for k in ["V", "I", "P", "S"]]
    return "\n".join(lines)


def save_evaluation(entry: dict) -> str:
    """
    Append an evaluation entry to the JSONL file.

    Args:
        entry: Dict with evaluation answers

    Returns:
        Status message
    """
    entry["timestamp"] = datetime.utcnow().isoformat() + "Z"
    try:
        with open(Config.EVAL_FILE, "a", encoding="utf-8") as f:
            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
        logger.info(f"Evaluation saved: {entry}")
        return "✅ Utvärdering sparad!"
    except Exception as e:
        logger.error(f"Failed to save evaluation: {e}")
        return f"⚠️ Kunde inte spara: {e}"


def format_timestamp() -> str:
    return datetime.now().strftime("%H:%M:%S")