Spaces:

SsebaA
/

x

Running on Zero

App Files Files Community

SsebaA commited on Mar 27

Commit

33cafa8

verified ·

1 Parent(s): 806edf7

Update utils.py

Browse files

Files changed (1) hide show

utils.py +172 -230

utils.py CHANGED Viewed

@@ -1,264 +1,206 @@
 """
 VoiceNote AI - Utilities
-=========================
-WER calculator and HTML formatting functions.
 """
-import os
-import csv
 import logging
-from typing import Optional, Dict
 from datetime import datetime
-from jiwer import wer as compute_wer
 from config import Config
 logger = logging.getLogger(__name__)
-# ══════════════════════════════════════════════════════════
-# WER CALCULATOR
-# ══════════════════════════════════════════════════════════
-class WERCalculator:
-    """Word Error Rate calculator with validation"""
-    @staticmethod
-    def calculate(reference: str, hypothesis: str) -> Optional[float]:
-        """
-        Calculate WER score.
-        Returns percentage (0-100) or None if invalid input.
-        """
-        if not reference or not reference.strip():
-            return None
-        if not hypothesis or not hypothesis.strip():
-            logger.warning("Empty hypothesis for WER calculation")
-            return 100.0  # All words are errors
-        try:
-            score = compute_wer(
-                reference.lower().strip(),
-                hypothesis.lower().strip()
-            )
-            percentage = round(score * 100, 1)
-            logger.info(f"WER calculated: {percentage}%")
-            return percentage
-        except Exception as e:
-            logger.error(f"WER calculation error: {e}")
-            return None
-    @staticmethod
-    def get_quality_label(wer: Optional[float]) -> str:
-        """Get quality label for WER score"""
-        if wer is None:
-            return "N/A"
-        if wer < Config.WER_EXCELLENT:
-            return "Utmärkt"
-        elif wer < Config.WER_GOOD:
-            return "Bra"
-        elif wer < Config.WER_ACCEPTABLE:
-            return "Godkänd"
-        else:
-            return "Behöver förbättras"
-# ══════════════════════════════════════════════════════════
-# HTML FORMATTERS
-# ══════════════════════════════════════════════════════════
-def formatera_vips_html(vips: dict) -> str:
-    """Format VIPS dictionary as colored HTML"""
-    colors = {
-        'V': ('#10B981', '#ECFDF5'),  # Green
-        'I': ('#F59E0B', '#FFFBEB'),  # Amber
-        'P': ('#3B82F6', '#EFF6FF'),  # Blue
-        'S': ('#EF4444', '#FEF2F2'),  # Red
-    }
-    categories = {
-        'V': 'Välbefinnande',
-        'I': 'Integritet',
-        'P': 'Prevention',
-        'S': 'Säkerhet',
-    }
-    html_parts = []
-    for key in ['V', 'I', 'P', 'S']:
-        fg, bg = colors[key]
-        cat = categories[key]
-        text = vips.get(key, "Ingen relevant information.")
-        html_parts.append(f"""
-        <div style='background:{bg};border-left:4px solid {fg};padding:12px 16px;margin-bottom:10px;border-radius:8px;'>
-            <div style='color:{fg};font-weight:700;font-size:13px;margin-bottom:4px;'>{key} — {cat}</div>
-            <div style='color:#1F2937;font-size:14px;line-height:1.6;'>{text}</div>
-        </div>
-        """)
-    return "".join(html_parts)
-def wer_badge(wer_poang: Optional[float]) -> str:
-    """Create WER badge HTML"""
-    if wer_poang is None:
-        return ""
-    if wer_poang < Config.WER_EXCELLENT:
-        color, bg = "#059669", "#ECFDF5"
-        label = "Utmärkt ✅"
-    elif wer_poang < Config.WER_GOOD:
-        color, bg = "#0369A1", "#EFF6FF"
-        label = "Bra ✅"
-    elif wer_poang < Config.WER_ACCEPTABLE:
-        color, bg = "#D97706", "#FFFBEB"
-        label = "Godkänd ⚠️"
-    else:
-        color, bg = "#DC2626", "#FEF2F2"
-        label = "Behöver förbättras ❌"
-    return f"""
-    <div style='background:{bg};border:2px solid {color}55;border-radius:14px;
-                padding:20px;margin-bottom:12px;text-align:center;'>
-        <div style='font-size:42px;font-weight:900;color:{color};'>{wer_poang:.1f}%</div>
-        <div style='color:{color};font-size:16px;font-weight:700;margin-top:4px;'>WER: {label}</div>
-    </div>"""
-def formatera_historik_html(historik: list) -> str:
-    """Format history list as HTML"""
-    if not historik:
-        return "<p style='color:#64748B;text-align:center;padding:40px;'>Ingen historik ännu.</p>"
-    items = []
-    for post in reversed(historik[-5:]):  # Last 5 entries
-        tid = post.get('tid', 'N/A')
-        wer = post.get('wer_poang')
-        wer_text = f"{wer:.1f}%" if wer is not None else "N/A"
-        technique = post.get('prompt_technique', 'unknown')
-        technique_emoji = "📚" if technique == "few_shot" else "🧠"
-        items.append(f"""
-        <div style='background:#F8FAFC;border:1.5px solid #E2E8F0;border-radius:10px;
-                    padding:12px;margin-bottom:8px;'>
-            <div style='display:flex;justify-content:space-between;align-items:center;'>
-                <span style='color:#475569;font-weight:600;'>{technique_emoji} {tid}</span>
-                <span style='color:#64748B;font-size:13px;'>WER: {wer_text}</span>
-            </div>
-        </div>
-        """)
-    return "".join(items)
-# ══════════════════════════════════════════════════════════
-# FILE EXPORT FUNCTIONS
-# ══════════════════════════════════════════════════════════
-def spara_nedladdning(innehall: str) -> Optional[str]:
-    """Save text file for download"""
-    # Check if input is valid
-    if not innehall or not isinstance(innehall, str):
-        logger.warning("Invalid input for file download")
-        return None
-    try:
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        path = os.path.join(Config.TEMP_DIR, f"vips_anteckning_{timestamp}.txt")
-        with open(path, "w", encoding="utf-8") as f:
-            f.write(innehall)
-        logger.info(f"Saved text file: {path}")
-        return path
-    except Exception as e:
-        logger.error(f"Error saving text file: {e}")
-        return None
-def exportera_csv(alla_resultat: list) -> Optional[str]:
-    """Export all results as CSV"""
-    if not alla_resultat:
-        return None
-    try:
-        path = os.path.join(Config.TEMP_DIR, "voicenote_resultat.csv")
-        with open(path, "w", newline="", encoding="utf-8") as f:
-            fieldnames = ["tid", "prompt_technique", "asr_tid", "llm_tid", "total_tid", "wer_poang", "transkription", "referens", "vips"]
-            writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
-            writer.writeheader()
-            writer.writerows(alla_resultat)
-        logger.info(f"Exported CSV: {path}")
-        return path
-    except Exception as e:
-        logger.error(f"CSV export error: {e}")
-        return None
-# ══════════════════════════════════════════════════════════
-# USABILITY CALCULATORS
-# ══════════════════════════════════════════════════════════
-def berakna_sus(*svar) -> str:
-    """Calculate SUS score and return HTML"""
-    try:
-        pl = []
-        for i, val in enumerate(svar):
-            try:
-                v = int(val)
-            except (TypeError, ValueError):
-                return "<div style='color:#DC2626;padding:14px;background:#FEF2F2;border-radius:10px;font-weight:600;'>⚠️ Fyll i alla 10 frågor.</div>"
-            pl.append(v - 1 if i % 2 == 0 else 5 - v)
-        total = sum(pl) * 2.5
-        # Determine color and label
-        if total >= Config.SUS_GOOD:
-            f, b = "#059669", "#ECFDF5"
-        elif total >= Config.SUS_PASS:
-            f, b = "#D97706", "#FFFBEB"
-        else:
-            f, b = "#DC2626", "#FEF2F2"
-        e = ("Utmärkt ✅" if total >= Config.SUS_EXCELLENT else
-             "Bra ✅" if total >= Config.SUS_GOOD else
-             "Godkänd ⚠️" if total >= Config.SUS_PASS else
-             "Underkänd ❌")
-        return f"""<div style='background:{b};border:2px solid {f}55;border-radius:14px;padding:28px;text-align:center;'>
-            <div style='font-size:60px;font-weight:900;color:{f};'>{total:.1f}</div>
-            <div style='color:{f};font-size:18px;font-weight:700;margin-top:6px;'>{e}</div>
-        </div>"""
-    except Exception as ex:
-        logger.error(f"SUS calculation error: {ex}")
-        return "<div style='color:#DC2626;'>Fel vid beräkning</div>"
-def berakna_nasa(*svar) -> str:
-    """Calculate NASA-TLX score and return HTML"""
     try:
-        values = []
-        for val in svar:
-            try:
-                v = int(val)
-                values.append(v)
-            except (TypeError, ValueError):
-                return "<div style='color:#DC2626;padding:14px;background:#FEF2F2;border-radius:10px;font-weight:600;'>⚠️ Fyll i alla 6 dimensioner.</div>"
-        total = sum(values)
-        if total <= Config.NASA_LOW:
-            f, b = "#059669", "#ECFDF5"
-            e = "Låg belastning ✅"
-        elif total <= Config.NASA_MEDIUM:
-            f, b = "#D97706", "#FFFBEB"
-            e = "Medel belastning ⚠️"
-        else:
-            f, b = "#DC2626", "#FEF2F2"
-            e = "Hög belastning ❌"
-        return f"""<div style='background:{b};border:2px solid {f}55;border-radius:14px;padding:28px;text-align:center;'>
-            <div style='font-size:60px;font-weight:900;color:{f};'>{total}</div>
-            <div style='color:{f};font-size:14px;margin-top:4px;'>/ 120 poäng</div>
-            <div style='color:{f};font-size:18px;font-weight:700;margin-top:10px;'>{e}</div>
-        </div>"""
-    except Exception as ex:
-        logger.error(f"NASA-TLX calculation error: {ex}")
-        return "<div style='color:#DC2626;'>Fel vid beräkning</div>"

 """
 VoiceNote AI - Utilities
+Helper functions for WER calculation, formatting, and export
 """
 import logging
 from datetime import datetime
 from config import Config
 logger = logging.getLogger(__name__)
+def calculate_wer(reference: str, hypothesis: str) -> float:
+    """
+    Calculate Word Error Rate (WER)
+    WER = (S + D + I) / N
+    where:
+    S = substitutions
+    D = deletions
+    I = insertions
+    N = total words in reference
+    Args:
+        reference: Ground truth text
+        hypothesis: Predicted text
+    Returns:
+        WER as percentage (0-100)
+    """
+    if not reference or not reference.strip():
+        return 0.0
+    ref_words = reference.strip().split()
+    hyp_words = hypothesis.strip().split()
+    # Initialize distance matrix
+    d = [[0] * (len(hyp_words) + 1) for _ in range(len(ref_words) + 1)]
+    # Initialize first row and column
+    for i in range(len(ref_words) + 1):
+        d[i][0] = i
+    for j in range(len(hyp_words) + 1):
+        d[0][j] = j
+    # Calculate edit distance
+    for i in range(1, len(ref_words) + 1):
+        for j in range(1, len(hyp_words) + 1):
+            if ref_words[i-1] == hyp_words[j-1]:
+                d[i][j] = d[i-1][j-1]
+            else:
+                substitution = d[i-1][j-1] + 1
+                insertion = d[i][j-1] + 1
+                deletion = d[i-1][j] + 1
+                d[i][j] = min(substitution, insertion, deletion)
+    # Calculate WER
+    wer = (d[len(ref_words)][len(hyp_words)] / len(ref_words)) * 100
+    return round(wer, 2)
+def format_vips_output(vips: dict) -> str:
+    """
+    Format VIPS dictionary as readable text
+    Args:
+        vips: Dictionary with V, I, P, S categories
+    Returns:
+        Formatted VIPS text
+    """
+    output = []
+    for category in ["V", "I", "P", "S"]:
+        if category in vips:
+            output.append(f"{category}: {vips[category]}")
+    return "\n".join(output)
+def format_timestamp() -> str:
+    """Get current timestamp in HH:MM:SS format"""
+    return datetime.now().strftime("%H:%M:%S")
+def format_report(
+    transcription: str,
+    vips: dict,
+    reference: str,
+    wer: float,
+    asr_time: float,
+    llm_time: float,
+    total_time: float
+) -> str:
+    """
+    Format complete report for display
+    Args:
+        transcription: Transcribed text
+        vips: VIPS classifications
+        reference: Reference text for WER
+        wer: Word Error Rate
+        asr_time: ASR processing time
+        llm_time: LLM processing time
+        total_time: Total processing time
+    Returns:
+        Formatted report text
+    """
+    technique_name = "Few-shot Prompting" if Config.PROMPT_TECHNIQUE == "few_shot" else "Chain-of-Thought Prompting"
+    report = f"""VoiceNote AI — VIPS Journalanteckning
+Tid:       {format_timestamp()}
+ASR Model: OpenAI Whisper ({Config.ASR_MODEL_NAME})
+Prompt:    {technique_name} ({Config.PROMPT_TECHNIQUE})
+GDPR:      Dubbel anonymisering · Mistral AI EU-servrar
+ASR: {asr_time:.2f}s  |  LLM: {llm_time:.2f}s  |  Total: {total_time:.2f}s  |  WER: {wer if wer else 'N/A'}%
+Transkription:
+{transcription}
+VIPS-Dokumentation:
+{format_vips_output(vips)}
+Referenstext (för WER):
+{reference if reference else 'Ingen referenstext angiven'}
+"""
+    return report
+def export_to_csv(data: dict) -> str:
+    """
+    Export result to CSV format
+    Args:
+        data: Dictionary with result data
+    Returns:
+        CSV row as string
+    """
+    csv_header = "Tid,Prompt_Technique,ASR_Tid,LLM_Tid,Total_Tid,WER,Transkription,V,I,P,S,Referens\n"
+    csv_row = f"{data.get('tid', '')},{data.get('technique', '')}," \
+              f"{data.get('asr_tid', '')},{data.get('llm_tid', '')}," \
+              f"{data.get('total_tid', '')},{data.get('wer', '')}," \
+              f"\"{data.get('transkription', '')}\",\"{data.get('V', '')}\"," \
+              f"\"{data.get('I', '')}\",\"{data.get('P', '')}\"," \
+              f"\"{data.get('S', '')}\",\"{data.get('referens', '')}\"\n"
+    return csv_header + csv_row
+def spara_nedladdning(
+    transcription: str,
+    vips: dict,
+    reference: str,
+    wer: float,
+    asr_time: float,
+    llm_time: float,
+    total_time: float
+) -> str:
+    """
+    Save results to downloadable file
+    CRITICAL FIX: Return a valid filepath instead of multiline string
+    Args:
+        transcription: Transcribed text
+        vips: VIPS classifications
+        reference: Reference text
+        wer: Word Error Rate
+        asr_time: ASR time
+        llm_time: LLM time
+        total_time: Total time
+    Returns:
+        Path to saved file
+    """
+    # Validate inputs
+    if not transcription or not isinstance(transcription, str):
+        logger.warning("Invalid input for file download")
+        return None
+    # Generate filename with timestamp
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"/tmp/voicenote_ai_{timestamp}.txt"
+    # Generate report content
+    report = format_report(
+        transcription=transcription,
+        vips=vips,
+        reference=reference,
+        wer=wer,
+        asr_time=asr_time,
+        llm_time=llm_time,
+        total_time=total_time
+    )
+    # Write to file
     try:
+        with open(filename, 'w', encoding='utf-8') as f:
+            f.write(report)
+        logger.info(f"Report saved to {filename}")
+        return filename
+    except Exception as e:
+        logger.error(f"Failed to save report: {e}")
+        return None