""" app.py - VoiceNote AI v2.1 Graceful DeepL fallback: when DeepL quota exhausted or fails, Swedish text is sent directly to Scaleway LLM instead. """ import json import logging import datetime import spaces import gradio as gr from config import Config from gdpr_filter import apply_gdpr_filter from models import WhisperASR, DeepLTranslator, MistralClient from vips_classifier import classify_all from utils import calculate_wer, format_vips_output, save_evaluation logger = logging.getLogger(__name__) asr_model = WhisperASR() deepl_client = None mistral_client = None def _get_clients(): global deepl_client, mistral_client if deepl_client is None: try: deepl_client = DeepLTranslator() except Exception as e: logger.warning(f"DeepL client init failed: {e}") deepl_client = None if mistral_client is None: mistral_client = MistralClient() return deepl_client, mistral_client def _make_json(transcription, wer, zero, few, cot): timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"/tmp/voicenote_{timestamp}.json" data = { "timestamp": datetime.datetime.now().isoformat(), "system": "VoiceNote AI v2.1", "transcription": transcription, "wer": wer, "vips_results": {"zero_shot": zero, "few_shot": few, "chain_of_thought": cot} } with open(filename, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) return filename @spaces.GPU def run_pipeline_audio(audio, reference_text): try: swedish_text = asr_model.transcribe(audio) if not swedish_text or not swedish_text.strip(): return ("Transkriptionen ar tom.", "", "", "", "", "", "") except Exception as e: logger.exception("ASR failed") return (f"[FEL ASR]: {e}", "", "", "", "", "", "") return _run_common(swedish_text, reference_text) def run_pipeline_text(text_input, reference_text): if not text_input or not text_input.strip(): return ("Ingen text angiven.", "", "", "", "", "", "") return _run_common(text_input.strip(), reference_text) def _run_common(swedish_text, reference_text): logger.info("Running GDPR filter...") anonymized_sv = apply_gdpr_filter(swedish_text) # Get clients try: dl, mc = _get_clients() except Exception as e: logger.exception("Client init failed") return (swedish_text, anonymized_sv, f"[FEL]: {e}", "", "", "", "") # Try DeepL with graceful fallback to Swedish logger.info("Running DeepL (with fallback)...") if dl is None: # DeepL client never initialized logger.warning("DeepL unavailable - using Swedish text for LLM") english_text_display = "[DeepL ej tillganglig - skickar svensk text direkt till LLM]\n\n" + anonymized_sv text_for_llm = anonymized_sv else: try: english_text = dl.translate(anonymized_sv) english_text_display = english_text text_for_llm = english_text logger.info("DeepL translation OK") except Exception as e: logger.warning(f"DeepL failed ({e}) - falling back to Swedish") english_text_display = f"[DeepL FALLBACK: {str(e)[:80]}]\n\n[Skickar svensk text direkt till LLM:]\n\n{anonymized_sv}" text_for_llm = anonymized_sv # Calculate WER if reference provided wer_display = "" if reference_text and reference_text.strip(): wer = calculate_wer(reference_text.strip(), swedish_text) wer_display = f"WER: {wer:.1f}%" # Send to Scaleway LLM (text_for_llm is either English or Swedish) logger.info("Running Scaleway LLM...") try: all_results = classify_all(text_for_llm, mc) logger.info("Scaleway classification complete") except Exception as e: logger.exception("LLM failed") err = f"[FEL LLM]: {e}" return (swedish_text, anonymized_sv, english_text_display, wer_display, err, err, err) zero_text = format_vips_output(all_results["zero_shot"]) few_text = format_vips_output(all_results["few_shot"]) cot_text = format_vips_output(all_results["chain_of_thought"]) logger.info("Returning results to UI") return (swedish_text, anonymized_sv, english_text_display, wer_display, zero_text, few_text, cot_text) def run_pipeline(audio, text_input, reference_text): if audio is not None: return run_pipeline_audio(audio, reference_text) return run_pipeline_text(text_input, reference_text) PROMPT_CHOICES = ["Zero-shot", "Few-shot", "Chain-of-Thought"] NASA_SCALE_STR = ["1", "2", "3", "4", "5", "6", "7"] custom_css = """ @import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600&display=swap'); * { font-family: 'DM Sans', sans-serif !important; } .gradio-container { background: #f0f4f8 !important; max-width: 1400px !important; margin: 0 auto; } .header-banner { background: linear-gradient(135deg, #1a5276 0%, #2980b9 100%); border-radius: 16px; padding: 32px 40px; margin-bottom: 8px; } .header-banner h1 { color: white !important; font-size: 2rem !important; font-weight: 600 !important; margin: 0 0 6px 0 !important; } .header-banner p { color: rgba(255,255,255,0.85) !important; font-size: 0.9rem !important; margin: 0 !important; } .section-card { background: white; border-radius: 14px; padding: 28px; margin-bottom: 16px; border: 1px solid #e8ecf0; } .section-label { font-size: 0.7rem !important; font-weight: 600 !important; letter-spacing: 0.12em !important; text-transform: uppercase !important; color: #2980b9 !important; margin-bottom: 16px !important; } .vips-col-zero { border-top: 3px solid #e74c3c !important; border-radius: 10px; padding: 16px; } .vips-col-few { border-top: 3px solid #2980b9 !important; border-radius: 10px; padding: 16px; } .vips-col-cot { border-top: 3px solid #27ae60 !important; border-radius: 10px; padding: 16px; } .gr-button-primary { background: linear-gradient(135deg, #1a5276, #2980b9) !important; border: none !important; border-radius: 10px !important; font-weight: 600 !important; } footer, .footer, .gradio-container > footer, a[href*="gradio.app"], a[href*="/?view=api"] { display: none !important; visibility: hidden !important; } """ with gr.Blocks(title="VoiceNote AI") as demo: gr.HTML("""

VoiceNote AI

VIPS-journalgenerering | Whisper KBLab -> GDPR -> DeepL (fallback: SV) -> Scaleway

""") with gr.Group(elem_classes="section-card"): gr.Markdown("##### INMATNING", elem_classes="section-label") with gr.Row(equal_height=True): audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Ljud", scale=1) text_input = gr.Textbox(label="Eller text", lines=5, scale=1, placeholder="Klistra in patientsamtalet har...") with gr.Row(): reference_input = gr.Textbox(label="Referenstext for WER (valfritt)", lines=2, scale=3) process_btn = gr.Button("Generera journalanteckning", variant="primary", size="lg", scale=1) with gr.Group(elem_classes="section-card"): gr.Markdown("##### RESULTAT", elem_classes="section-label") wer_out = gr.Textbox(label="Word Error Rate", interactive=False) with gr.Accordion("Pipeline-detaljer", open=False): with gr.Row(): transcription_out = gr.Textbox(label="Transkription (SV)", lines=5, interactive=True) anonymized_out = gr.Textbox(label="Anonymiserad (SV)", lines=5, interactive=False) translated_out = gr.Textbox(label="Oversatt (EN) eller fallback", lines=5, interactive=False) gr.Markdown("##### VIPS - TRE PROMPTSTRATEGIER", elem_classes="section-label") with gr.Row(): with gr.Column(elem_classes="vips-col-zero"): gr.HTML("

Zero-shot

") zero_out = gr.Textbox(label="", lines=10, interactive=True) with gr.Column(elem_classes="vips-col-few"): gr.HTML("

Few-shot

") few_out = gr.Textbox(label="", lines=10, interactive=True) with gr.Column(elem_classes="vips-col-cot"): gr.HTML("

Chain-of-Thought

") cot_out = gr.Textbox(label="", lines=10, interactive=True) with gr.Group(elem_classes="section-card"): gr.Markdown("##### UTVARDERING", elem_classes="section-label") gr.Markdown("**Del 1 - Jamforelse av promptstrategier**") with gr.Row(): with gr.Column(): eval_complete = gr.Radio(choices=PROMPT_CHOICES, label="1. Mest fullstandig?") eval_hallucination = gr.Radio(choices=PROMPT_CHOICES, label="2. Undvek bast att hitta pa information?") with gr.Column(): eval_structure = gr.Radio(choices=PROMPT_CHOICES, label="3. Foljde VIPS-strukturen bast?") eval_clinical = gr.Radio(choices=PROMPT_CHOICES, label="4. Skulle valjas i klinisk praktik?") eval_comment = gr.Textbox(label="5. Kommentar", lines=3) gr.Markdown("---\n**Del 2 - NASA-TLX** | *1 = lag, 7 = hog*") with gr.Row(): with gr.Column(): tlx_mental = gr.Radio(choices=NASA_SCALE_STR, label="Mental") tlx_physical = gr.Radio(choices=NASA_SCALE_STR, label="Fysisk") tlx_temporal = gr.Radio(choices=NASA_SCALE_STR, label="Tidsbrist") with gr.Column(): tlx_performance = gr.Radio(choices=NASA_SCALE_STR, label="Prestation") tlx_effort = gr.Radio(choices=NASA_SCALE_STR, label="Anstrangning") tlx_frustration = gr.Radio(choices=NASA_SCALE_STR, label="Frustration") with gr.Row(): save_btn = gr.Button("Spara utvardering & ladda ner", variant="primary", scale=2) clear_btn = gr.Button("Rensa all data fran granssnittet", variant="secondary", scale=1) eval_status = gr.Textbox(label="", interactive=False, placeholder="Status visas har efter sparning...") download_file = gr.File( label="Komplett resultat + utvardering (JSON) - klicka for att ladda ner", interactive=False, ) # Event handlers process_btn.click( fn=run_pipeline, inputs=[audio_input, text_input, reference_input], outputs=[transcription_out, anonymized_out, translated_out, wer_out, zero_out, few_out, cot_out], ) def on_save(c, h, s, cl, cm, m, p, t, pe, e, f, transcription, wer, zero, few, cot): """Combine pipeline results + evaluation into ONE downloadable file.""" if not any([c, h, s, cl]): return "Fyll i minst ett svar i Del 1.", None filled = [int(x) for x in [m, p, t, pe, e, f] if x] entry = { "timestamp": datetime.datetime.now().isoformat(), "system": "VoiceNote AI v2.1", "pipeline_results": { "transcription": transcription, "wer": wer, "vips": { "zero_shot": zero, "few_shot": few, "chain_of_thought": cot, }, }, "prompt_evaluation": { "most_complete": c, "least_hallucination": h, "best_structure": s, "clinical_choice": cl, "comment": cm or "", }, "nasa_tlx": { "mental": m, "physical": p, "temporal": t, "performance": pe, "effort": e, "frustration": f, "total_avg": round(sum(filled)/len(filled), 2) if filled else None, }, } try: save_evaluation(entry) except Exception as ex: logger.warning(f"Server save failed: {ex}") timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"/tmp/voicenote_utvardering_{timestamp}.json" with open(filename, "w", encoding="utf-8") as fh: json.dump(entry, fh, ensure_ascii=False, indent=2) return "Utvardering sparad! Fil klar for nedladdning nedan.", filename save_btn.click( fn=on_save, inputs=[eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment, tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration, transcription_out, wer_out, zero_out, few_out, cot_out], outputs=[eval_status, download_file], ) def clear_all(): """Reset all UI fields - no data remains in interface or memory.""" return ( None, "", "", "", "", "", "", "", "", "", None, None, None, None, "", None, None, None, None, None, None, "All data rensad fran granssnittet.", None, ) clear_btn.click( fn=clear_all, inputs=[], outputs=[ audio_input, text_input, reference_input, transcription_out, anonymized_out, translated_out, wer_out, zero_out, few_out, cot_out, eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment, tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration, eval_status, download_file, ], ) if __name__ == "__main__": demo.launch(css=custom_css)