| """ |
| app.py - VoiceNote AI v2.1 |
| Graceful DeepL fallback: when DeepL quota exhausted or fails, |
| Swedish text is sent directly to Scaleway LLM instead. |
| """ |
|
|
| import json |
| import logging |
| import datetime |
| import spaces |
| import gradio as gr |
|
|
| from config import Config |
| from gdpr_filter import apply_gdpr_filter |
| from models import WhisperASR, DeepLTranslator, MistralClient |
| from vips_classifier import classify_all |
| from utils import calculate_wer, format_vips_output, save_evaluation |
|
|
| logger = logging.getLogger(__name__) |
|
|
| asr_model = WhisperASR() |
| deepl_client = None |
| mistral_client = None |
|
|
| def _get_clients(): |
| global deepl_client, mistral_client |
| if deepl_client is None: |
| try: |
| deepl_client = DeepLTranslator() |
| except Exception as e: |
| logger.warning(f"DeepL client init failed: {e}") |
| deepl_client = None |
| if mistral_client is None: |
| mistral_client = MistralClient() |
| return deepl_client, mistral_client |
|
|
|
|
| def _make_json(transcription, wer, zero, few, cot): |
| timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") |
| filename = f"/tmp/voicenote_{timestamp}.json" |
| data = { |
| "timestamp": datetime.datetime.now().isoformat(), |
| "system": "VoiceNote AI v2.1", |
| "transcription": transcription, |
| "wer": wer, |
| "vips_results": {"zero_shot": zero, "few_shot": few, "chain_of_thought": cot} |
| } |
| with open(filename, "w", encoding="utf-8") as f: |
| json.dump(data, f, ensure_ascii=False, indent=2) |
| return filename |
|
|
|
|
| @spaces.GPU |
| def run_pipeline_audio(audio, reference_text): |
| try: |
| swedish_text = asr_model.transcribe(audio) |
| if not swedish_text or not swedish_text.strip(): |
| return ("Transkriptionen ar tom.", "", "", "", "", "", "") |
| except Exception as e: |
| logger.exception("ASR failed") |
| return (f"[FEL ASR]: {e}", "", "", "", "", "", "") |
| return _run_common(swedish_text, reference_text) |
|
|
|
|
| def run_pipeline_text(text_input, reference_text): |
| if not text_input or not text_input.strip(): |
| return ("Ingen text angiven.", "", "", "", "", "", "") |
| return _run_common(text_input.strip(), reference_text) |
|
|
|
|
| def _run_common(swedish_text, reference_text): |
| logger.info("Running GDPR filter...") |
| anonymized_sv = apply_gdpr_filter(swedish_text) |
|
|
| |
| try: |
| dl, mc = _get_clients() |
| except Exception as e: |
| logger.exception("Client init failed") |
| return (swedish_text, anonymized_sv, f"[FEL]: {e}", "", "", "", "") |
|
|
| |
| logger.info("Running DeepL (with fallback)...") |
| if dl is None: |
| |
| logger.warning("DeepL unavailable - using Swedish text for LLM") |
| english_text_display = "[DeepL ej tillganglig - skickar svensk text direkt till LLM]\n\n" + anonymized_sv |
| text_for_llm = anonymized_sv |
| else: |
| try: |
| english_text = dl.translate(anonymized_sv) |
| english_text_display = english_text |
| text_for_llm = english_text |
| logger.info("DeepL translation OK") |
| except Exception as e: |
| logger.warning(f"DeepL failed ({e}) - falling back to Swedish") |
| english_text_display = f"[DeepL FALLBACK: {str(e)[:80]}]\n\n[Skickar svensk text direkt till LLM:]\n\n{anonymized_sv}" |
| text_for_llm = anonymized_sv |
|
|
| |
| wer_display = "" |
| if reference_text and reference_text.strip(): |
| wer = calculate_wer(reference_text.strip(), swedish_text) |
| wer_display = f"WER: {wer:.1f}%" |
|
|
| |
| logger.info("Running Scaleway LLM...") |
| try: |
| all_results = classify_all(text_for_llm, mc) |
| logger.info("Scaleway classification complete") |
| except Exception as e: |
| logger.exception("LLM failed") |
| err = f"[FEL LLM]: {e}" |
| return (swedish_text, anonymized_sv, english_text_display, wer_display, err, err, err) |
|
|
| zero_text = format_vips_output(all_results["zero_shot"]) |
| few_text = format_vips_output(all_results["few_shot"]) |
| cot_text = format_vips_output(all_results["chain_of_thought"]) |
|
|
| logger.info("Returning results to UI") |
| return (swedish_text, anonymized_sv, english_text_display, wer_display, |
| zero_text, few_text, cot_text) |
|
|
|
|
| def run_pipeline(audio, text_input, reference_text): |
| if audio is not None: |
| return run_pipeline_audio(audio, reference_text) |
| return run_pipeline_text(text_input, reference_text) |
|
|
|
|
| PROMPT_CHOICES = ["Zero-shot", "Few-shot", "Chain-of-Thought"] |
| NASA_SCALE_STR = ["1", "2", "3", "4", "5", "6", "7"] |
|
|
| custom_css = """ |
| @import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600&display=swap'); |
| * { font-family: 'DM Sans', sans-serif !important; } |
| .gradio-container { background: #f0f4f8 !important; max-width: 1400px !important; margin: 0 auto; } |
| .header-banner { |
| background: linear-gradient(135deg, #1a5276 0%, #2980b9 100%); |
| border-radius: 16px; padding: 32px 40px; margin-bottom: 8px; |
| } |
| .header-banner h1 { color: white !important; font-size: 2rem !important; font-weight: 600 !important; margin: 0 0 6px 0 !important; } |
| .header-banner p { color: rgba(255,255,255,0.85) !important; font-size: 0.9rem !important; margin: 0 !important; } |
| .section-card { background: white; border-radius: 14px; padding: 28px; margin-bottom: 16px; border: 1px solid #e8ecf0; } |
| .section-label { |
| font-size: 0.7rem !important; font-weight: 600 !important; |
| letter-spacing: 0.12em !important; text-transform: uppercase !important; |
| color: #2980b9 !important; margin-bottom: 16px !important; |
| } |
| .vips-col-zero { border-top: 3px solid #e74c3c !important; border-radius: 10px; padding: 16px; } |
| .vips-col-few { border-top: 3px solid #2980b9 !important; border-radius: 10px; padding: 16px; } |
| .vips-col-cot { border-top: 3px solid #27ae60 !important; border-radius: 10px; padding: 16px; } |
| .gr-button-primary { |
| background: linear-gradient(135deg, #1a5276, #2980b9) !important; |
| border: none !important; border-radius: 10px !important; font-weight: 600 !important; |
| } |
| footer, .footer, .gradio-container > footer, |
| a[href*="gradio.app"], a[href*="/?view=api"] { |
| display: none !important; |
| visibility: hidden !important; |
| } |
| """ |
|
|
|
|
| with gr.Blocks(title="VoiceNote AI") as demo: |
|
|
| gr.HTML(""" |
| <div class="header-banner"> |
| <h1>VoiceNote AI</h1> |
| <p>VIPS-journalgenerering | Whisper KBLab -> GDPR -> DeepL (fallback: SV) -> Scaleway</p> |
| </div> |
| """) |
|
|
| with gr.Group(elem_classes="section-card"): |
| gr.Markdown("##### INMATNING", elem_classes="section-label") |
| with gr.Row(equal_height=True): |
| audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", |
| label="Ljud", scale=1) |
| text_input = gr.Textbox(label="Eller text", lines=5, scale=1, |
| placeholder="Klistra in patientsamtalet har...") |
| with gr.Row(): |
| reference_input = gr.Textbox(label="Referenstext for WER (valfritt)", |
| lines=2, scale=3) |
| process_btn = gr.Button("Generera journalanteckning", |
| variant="primary", size="lg", scale=1) |
|
|
| with gr.Group(elem_classes="section-card"): |
| gr.Markdown("##### RESULTAT", elem_classes="section-label") |
|
|
| wer_out = gr.Textbox(label="Word Error Rate", interactive=False) |
|
|
| with gr.Accordion("Pipeline-detaljer", open=False): |
| with gr.Row(): |
| transcription_out = gr.Textbox(label="Transkription (SV)", |
| lines=5, interactive=True) |
| anonymized_out = gr.Textbox(label="Anonymiserad (SV)", |
| lines=5, interactive=False) |
| translated_out = gr.Textbox(label="Oversatt (EN) eller fallback", |
| lines=5, interactive=False) |
|
|
| gr.Markdown("##### VIPS - TRE PROMPTSTRATEGIER", elem_classes="section-label") |
| with gr.Row(): |
| with gr.Column(elem_classes="vips-col-zero"): |
| gr.HTML("<h4>Zero-shot</h4>") |
| zero_out = gr.Textbox(label="", lines=10, interactive=True) |
| with gr.Column(elem_classes="vips-col-few"): |
| gr.HTML("<h4>Few-shot</h4>") |
| few_out = gr.Textbox(label="", lines=10, interactive=True) |
| with gr.Column(elem_classes="vips-col-cot"): |
| gr.HTML("<h4>Chain-of-Thought</h4>") |
| cot_out = gr.Textbox(label="", lines=10, interactive=True) |
|
|
| with gr.Group(elem_classes="section-card"): |
| gr.Markdown("##### UTVARDERING", elem_classes="section-label") |
| gr.Markdown("**Del 1 - Jamforelse av promptstrategier**") |
| with gr.Row(): |
| with gr.Column(): |
| eval_complete = gr.Radio(choices=PROMPT_CHOICES, |
| label="1. Mest fullstandig?") |
| eval_hallucination = gr.Radio(choices=PROMPT_CHOICES, |
| label="2. Undvek bast att hitta pa information?") |
| with gr.Column(): |
| eval_structure = gr.Radio(choices=PROMPT_CHOICES, |
| label="3. Foljde VIPS-strukturen bast?") |
| eval_clinical = gr.Radio(choices=PROMPT_CHOICES, |
| label="4. Skulle valjas i klinisk praktik?") |
| eval_comment = gr.Textbox(label="5. Kommentar", lines=3) |
|
|
| gr.Markdown("---\n**Del 2 - NASA-TLX** | *1 = lag, 7 = hog*") |
| with gr.Row(): |
| with gr.Column(): |
| tlx_mental = gr.Radio(choices=NASA_SCALE_STR, label="Mental") |
| tlx_physical = gr.Radio(choices=NASA_SCALE_STR, label="Fysisk") |
| tlx_temporal = gr.Radio(choices=NASA_SCALE_STR, label="Tidsbrist") |
| with gr.Column(): |
| tlx_performance = gr.Radio(choices=NASA_SCALE_STR, label="Prestation") |
| tlx_effort = gr.Radio(choices=NASA_SCALE_STR, label="Anstrangning") |
| tlx_frustration = gr.Radio(choices=NASA_SCALE_STR, label="Frustration") |
|
|
| with gr.Row(): |
| save_btn = gr.Button("Spara utvardering & ladda ner", variant="primary", scale=2) |
| clear_btn = gr.Button("Rensa all data fran granssnittet", variant="secondary", scale=1) |
|
|
| eval_status = gr.Textbox(label="", interactive=False, |
| placeholder="Status visas har efter sparning...") |
|
|
| download_file = gr.File( |
| label="Komplett resultat + utvardering (JSON) - klicka for att ladda ner", |
| interactive=False, |
| ) |
|
|
| |
| process_btn.click( |
| fn=run_pipeline, |
| inputs=[audio_input, text_input, reference_input], |
| outputs=[transcription_out, anonymized_out, translated_out, wer_out, |
| zero_out, few_out, cot_out], |
| ) |
|
|
| def on_save(c, h, s, cl, cm, m, p, t, pe, e, f, |
| transcription, wer, zero, few, cot): |
| """Combine pipeline results + evaluation into ONE downloadable file.""" |
| if not any([c, h, s, cl]): |
| return "Fyll i minst ett svar i Del 1.", None |
|
|
| filled = [int(x) for x in [m, p, t, pe, e, f] if x] |
|
|
| entry = { |
| "timestamp": datetime.datetime.now().isoformat(), |
| "system": "VoiceNote AI v2.1", |
|
|
| "pipeline_results": { |
| "transcription": transcription, |
| "wer": wer, |
| "vips": { |
| "zero_shot": zero, |
| "few_shot": few, |
| "chain_of_thought": cot, |
| }, |
| }, |
|
|
| "prompt_evaluation": { |
| "most_complete": c, |
| "least_hallucination": h, |
| "best_structure": s, |
| "clinical_choice": cl, |
| "comment": cm or "", |
| }, |
|
|
| "nasa_tlx": { |
| "mental": m, |
| "physical": p, |
| "temporal": t, |
| "performance": pe, |
| "effort": e, |
| "frustration": f, |
| "total_avg": round(sum(filled)/len(filled), 2) if filled else None, |
| }, |
| } |
|
|
| try: |
| save_evaluation(entry) |
| except Exception as ex: |
| logger.warning(f"Server save failed: {ex}") |
|
|
| timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") |
| filename = f"/tmp/voicenote_utvardering_{timestamp}.json" |
| with open(filename, "w", encoding="utf-8") as fh: |
| json.dump(entry, fh, ensure_ascii=False, indent=2) |
|
|
| return "Utvardering sparad! Fil klar for nedladdning nedan.", filename |
|
|
| save_btn.click( |
| fn=on_save, |
| inputs=[eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment, |
| tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration, |
| transcription_out, wer_out, zero_out, few_out, cot_out], |
| outputs=[eval_status, download_file], |
| ) |
|
|
| def clear_all(): |
| """Reset all UI fields - no data remains in interface or memory.""" |
| return ( |
| None, "", "", "", "", "", "", "", "", "", |
| None, None, None, None, "", |
| None, None, None, None, None, None, |
| "All data rensad fran granssnittet.", |
| None, |
| ) |
|
|
| clear_btn.click( |
| fn=clear_all, |
| inputs=[], |
| outputs=[ |
| audio_input, text_input, reference_input, |
| transcription_out, anonymized_out, translated_out, wer_out, |
| zero_out, few_out, cot_out, |
| eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment, |
| tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration, |
| eval_status, download_file, |
| ], |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch(css=custom_css) |