Spaces:

SsebaA
/

x

Sleeping

App Files Files Community

SsebaA commited on 7 days ago

Commit

0586e99

verified ·

1 Parent(s): 54eb47d

Update app.py

Browse files

Files changed (1) hide show

app.py +256 -207

app.py CHANGED Viewed

@@ -1,24 +1,22 @@
 import logging
 import gradio as gr
 from models import WhisperASR, MistralClient
-from gdpr_filter import GDPRFilter
-from vips_classifier import VIPSClassifier
-from config import Config
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Initialize components
-whisper_model = WhisperASR()
-gdpr_filter = GDPRFilter()
-llm_client = MistralClient()
-vips_classifier = VIPSClassifier(llm_client)
 def format_vips_output(text) -> str:
-    """Format VIPS output, handling dict or string types."""
     if isinstance(text, dict):
         text = str(text)
@@ -28,219 +26,270 @@ def format_vips_output(text) -> str:
     return str(text).strip()
-def run_pipeline_audio(audio_input, reference_text=""):
-    """Process audio input through full pipeline."""
-    if audio_input is None:
-        return "❌ No audio input provided", "", "", "", ""
-    logger.info("Processing audio input...")
-    # Step 1: ASR
-    logger.info("Running Whisper ASR...")
-    transcript = whisper_model.transcribe(audio_input)
-    if not transcript:
-        return "❌ ASR failed - no output", "", "", "", ""
-    # Calculate WER if reference provided
-    wer_result = ""
-    if reference_text and reference_text.strip():
-        from models import calculate_wer
-        wer_value = calculate_wer(reference_text, transcript)
-        wer_result = f"WER: {wer_value:.2f}%"
-    return _run_common(transcript, wer_result)
 def run_pipeline_text(text_input):
-    """Process text input through pipeline (skip ASR)."""
     if not text_input or not text_input.strip():
-        return "❌ No text input provided", "", "", "", ""
-    logger.info("Processing text input (ASR skipped)...")
-    return _run_common(text_input.strip(), "ASR: Skipped")
-def _run_common(text_input, wer_info=""):
-    """Common pipeline: GDPR → VIPS Classification."""
-    # Step 2: GDPR Filter
     logger.info("Running GDPR filter...")
-    anonymized_text = gdpr_filter.apply_dual_layer_gdpr(text_input)
-    # Step 3: VIPS Classification (3 strategies)
     logger.info("Running Scaleway LLM...")
-    all_results = vips_classifier.classify_vips(anonymized_text)
-    # Format outputs
-    zero_text = format_vips_output(all_results.get("zero_shot", ""))
-    few_text = format_vips_output(all_results.get("few_shot", ""))
-    chain_text = format_vips_output(all_results.get("chain_of_thought", ""))
-    logger.info("Pipeline complete")
-    return (
-        f"✅ Transcription:\n{text_input}\n\n[{wer_info}]",
-        zero_text,
-        few_text,
-        chain_text,
-        f"Anonymized: {len(anonymized_text)} chars"
-    )
-def on_save_results(zero, few, chain):
-    """Save results to file."""
-    import json
-    from datetime import datetime
-    data = {
-        "timestamp": datetime.now().isoformat(),
-        "zero_shot": zero,
-        "few_shot": few,
-        "chain_of_thought": chain,
-    }
-    filename = f"{Config.APP_NAME}_v{Config.APP_VERSION}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
-    with open(filename, 'w', encoding='utf-8') as f:
-        json.dump(data, f, indent=2, ensure_ascii=False)
-    return f"✅ Saved to {filename}"
-# ============================================================================
-# GRADIO INTERFACE — 2+1 LAYOUT (2 columns on top, 1 full-width below)
-# ============================================================================
-with gr.Blocks(title=f"{Config.APP_NAME} v{Config.APP_VERSION}", theme=gr.themes.Soft()) as demo:
-    # Header
-    gr.Markdown(f"""
-    # {Config.APP_NAME}
-    **Automated VIPS Documentation from Swedish Patient-Nurse Conversations**
-    Pipeline: Whisper KBLab → GDPR Filter → Scaleway LLM (VIPS)
     """)
-    # ========== INPUTS ==========
-    with gr.Group():
-        gr.Markdown("### 📥 Input")
-        with gr.Tabs():
-            # Tab 1: Audio input
-            with gr.TabItem("🎤 Voice Recording"):
-                audio_input = gr.Audio(
-                    label="Record or upload audio",
-                    type="filepath",
-                    format="wav"
-                )
-                reference_text = gr.Textbox(
-                    label="Reference text (for WER calculation, optional)",
-                    lines=3,
-                    placeholder="Paste correct transcription here..."
-                )
-                submit_audio = gr.Button("🚀 Process Audio", variant="primary")
-            # Tab 2: Text input
-            with gr.TabItem("📝 Text Input"):
-                text_input = gr.Textbox(
-                    label="Or paste text directly (skips ASR)",
-                    lines=5,
-                    placeholder="Paste transcribed conversation here..."
-                )
-                submit_text = gr.Button("🚀 Process Text", variant="primary")
-    # ========== OUTPUTS ==========
-    gr.Markdown("### 📤 Output")
-    # Transcription box
-    transcript_box = gr.Textbox(
-        label="Transcription & Metrics",
-        lines=4,
-        interactive=False
-    )
-    # **2-COLUMN LAYOUT FOR ZERO-SHOT AND FEW-SHOT**
-    with gr.Row():
-        with gr.Column():
-            zero_shot_output = gr.Textbox(
-                label="Zero-shot",
-                lines=20,
-                interactive=True,
-                show_label=True
-            )
-        with gr.Column():
-            few_shot_output = gr.Textbox(
-                label="Few-shot",
-                lines=20,
-                interactive=True,
-                show_label=True
-            )
-    # **FULL-WIDTH LAYOUT FOR CHAIN-OF-THOUGHT**
-    chain_of_thought_output = gr.Textbox(
-        label="Chain-of-Thought",
-        lines=20,
-        interactive=True,
-        show_label=True
-    )
-    # Info box
-    info_box = gr.Textbox(
-        label="Info",
-        interactive=False
-    )
-    # ========== ACTIONS ==========
-    with gr.Row():
-        save_btn = gr.Button("💾 Save Results as JSON", variant="secondary")
-        clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
-    save_status = gr.Textbox(label="Status", interactive=False)
-    # ========== EVENT HANDLERS ==========
-    # Process audio
-    submit_audio.click(
-        fn=run_pipeline_audio,
-        inputs=[audio_input, reference_text],
-        outputs=[transcript_box, zero_shot_output, few_shot_output, chain_of_thought_output, info_box]
-    )
-    # Process text
-    submit_text.click(
-        fn=run_pipeline_text,
-        inputs=[text_input],
-        outputs=[transcript_box, zero_shot_output, few_shot_output, chain_of_thought_output, info_box]
     )
-    # Save results
     save_btn.click(
-        fn=on_save_results,
-        inputs=[zero_shot_output, few_shot_output, chain_of_thought_output],
-        outputs=[save_status]
     )
-    # Clear all
     clear_btn.click(
-        fn=lambda: ("", "", "", "", "", ""),
-        outputs=[audio_input, text_input, transcript_box, zero_shot_output,
-                few_shot_output, chain_of_thought_output]
     )
-    # Footer
-    gr.Markdown("""
-    ---
-    **⚠️ Disclaimer:** This system generates nursing documentation drafts only.
-    **Always review and approve** AI-generated notes before clinical use.
-    **Never rely on system output** for medical decision-making.
-    """)
 if __name__ == "__main__":
-    demo.launch(
-        share=False,
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

+import json
 import logging
+import datetime
+import spaces
 import gradio as gr
+from config import Config, VIPS_CATEGORIES
+from gdpr_filter import apply_gdpr_filter
 from models import WhisperASR, MistralClient
+from vips_classifier import classify_all
+logger = logging.getLogger(__name__)
+asr_model      = WhisperASR()
+mistral_client = None
 def format_vips_output(text) -> str:
     if isinstance(text, dict):
         text = str(text)
     return str(text).strip()
+def _get_clients():
+    global mistral_client
+    if mistral_client is None:
+        mistral_client = MistralClient()
+    return mistral_client
+@spaces.GPU
+def run_pipeline_audio(audio):
+    try:
+        swedish_text = asr_model.transcribe(audio)
+        if not swedish_text or not swedish_text.strip():
+            return ("Transkriptionen ar tom.", "", "", "", "", "")
+    except Exception as e:
+        logger.exception("ASR failed")
+        return (f"[FEL ASR]: {e}", "", "", "", "", "")
+    return _run_common(swedish_text)
 def run_pipeline_text(text_input):
     if not text_input or not text_input.strip():
+        return ("Ingen text angiven.", "", "", "", "", "")
+    return _run_common(text_input.strip())
+def _run_common(swedish_text):
     logger.info("Running GDPR filter...")
+    anonymized_sv = apply_gdpr_filter(swedish_text)
+    # Get clients
+    try:
+        mc = _get_clients()
+    except Exception as e:
+        logger.exception("Client init failed")
+        return (swedish_text, anonymized_sv, f"[FEL]: {e}", "", "", "")
+    # Send to Scaleway LLM
     logger.info("Running Scaleway LLM...")
+    try:
+        all_results = classify_all(anonymized_sv, mc)
+        logger.info("Scaleway classification complete")
+    except Exception as e:
+        logger.exception("LLM failed")
+        err = f"[FEL LLM]: {e}"
+        return (swedish_text, anonymized_sv, err, err, err, err)
+    zero_text = format_vips_output(all_results["zero_shot"])
+    few_text  = format_vips_output(all_results["few_shot"])
+    cot_text  = format_vips_output(all_results["chain_of_thought"])
+    logger.info("Returning results to UI")
+    return (swedish_text, anonymized_sv, zero_text, few_text, cot_text)
+def run_pipeline(audio, text_input):
+    if audio is not None:
+        return run_pipeline_audio(audio)
+    return run_pipeline_text(text_input)
+PROMPT_CHOICES = ["Zero-shot", "Few-shot", "Chain-of-Thought"]
+NASA_SCALE_STR = ["1", "2", "3", "4", "5", "6", "7"]
+custom_css = """
+@import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600&display=swap');
+* { font-family: 'DM Sans', sans-serif !important; }
+.gradio-container { background: #f0f4f8 !important; max-width: 1400px !important; margin: 0 auto; }
+.header-banner {
+    background: linear-gradient(135deg, #1a5276 0%, #2980b9 100%);
+    border-radius: 16px; padding: 32px 40px; margin-bottom: 8px;
+}
+.header-banner h1 { color: white !important; font-size: 2rem !important; font-weight: 600 !important; margin: 0 0 6px 0 !important; }
+.header-banner p  { color: rgba(255,255,255,0.85) !important; font-size: 0.9rem !important; margin: 0 !important; }
+.section-card { background: white; border-radius: 14px; padding: 28px; margin-bottom: 16px; border: 1px solid #e8ecf0; }
+.section-label {
+    font-size: 0.7rem !important; font-weight: 600 !important;
+    letter-spacing: 0.12em !important; text-transform: uppercase !important;
+    color: #2980b9 !important; margin-bottom: 16px !important;
+}
+.vips-col-zero { border-top: 3px solid #e74c3c !important; border-radius: 10px; padding: 16px; }
+.vips-col-few  { border-top: 3px solid #2980b9 !important; border-radius: 10px; padding: 16px; }
+.vips-col-cot  { border-top: 3px solid #27ae60 !important; border-radius: 10px; padding: 16px; }
+.gr-button-primary {
+    background: linear-gradient(135deg, #1a5276, #2980b9) !important;
+    border: none !important; border-radius: 10px !important; font-weight: 600 !important;
+}
+footer, .footer, .gradio-container > footer,
+a[href*="gradio.app"], a[href*="/?view=api"] {
+    display: none !important;
+    visibility: hidden !important;
+}
+"""
+with gr.Blocks(title="VoiceNote AI") as demo:
+    gr.HTML(f"""
+    <div class="header-banner">
+      <h1>{Config.APP_NAME}</h1>
+      <p>VIPS-journalgenerering | Whisper KBLab -> GDPR -> Scaleway</p>
+    </div>
     """)
+    with gr.Group(elem_classes="section-card"):
+        gr.Markdown("##### INMATNING", elem_classes="section-label")
+        with gr.Row(equal_height=True):
+            audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath",
+                                   label="Ljud", scale=1)
+            text_input = gr.Textbox(label="Eller text", lines=5, scale=1,
+                                    placeholder="Klistra in patientsamtalet har...")
+        process_btn = gr.Button("Generera journalanteckning",
+                                variant="primary", size="lg")
+    with gr.Group(elem_classes="section-card"):
+        gr.Markdown("##### RESULTAT", elem_classes="section-label")
+        with gr.Accordion("Pipeline-detaljer", open=False):
+            with gr.Row():
+                transcription_out = gr.Textbox(label="Transkription (SV)",
+                                               lines=5, interactive=True)
+                anonymized_out = gr.Textbox(label="Anonymiserad (SV)",
+                                            lines=5, interactive=False)
+        gr.Markdown("##### VIPS - TRE PROMPTSTRATEGIER", elem_classes="section-label")
+        with gr.Row():
+            with gr.Column(elem_classes="vips-col-zero"):
+                gr.HTML("<h4>Zero-shot</h4>")
+                zero_out = gr.Textbox(label="", lines=10, interactive=True)
+            with gr.Column(elem_classes="vips-col-few"):
+                gr.HTML("<h4>Few-shot</h4>")
+                few_out = gr.Textbox(label="", lines=10, interactive=True)
+            with gr.Column(elem_classes="vips-col-cot"):
+                gr.HTML("<h4>Chain-of-Thought</h4>")
+                cot_out = gr.Textbox(label="", lines=10, interactive=True)
+    with gr.Group(elem_classes="section-card"):
+        gr.Markdown("##### UTVARDERING", elem_classes="section-label")
+        gr.Markdown("**Del 1 - Jamforelse av promptstrategier**")
+        with gr.Row():
+            with gr.Column():
+                eval_complete = gr.Radio(choices=PROMPT_CHOICES,
+                    label="1. Mest fullstandig?")
+                eval_hallucination = gr.Radio(choices=PROMPT_CHOICES,
+                    label="2. Undvek bast att hitta pa information?")
+            with gr.Column():
+                eval_structure = gr.Radio(choices=PROMPT_CHOICES,
+                    label="3. Foljde VIPS-strukturen bast?")
+                eval_clinical = gr.Radio(choices=PROMPT_CHOICES,
+                    label="4. Skulle valjas i klinisk praktik?")
+        eval_comment = gr.Textbox(label="5. Kommentar", lines=3)
+        gr.Markdown("---\n**Del 2 - NASA-TLX** | *1 = lag, 7 = hog*")
+        with gr.Row():
+            with gr.Column():
+                tlx_mental = gr.Radio(choices=NASA_SCALE_STR, label="Mental")
+                tlx_physical = gr.Radio(choices=NASA_SCALE_STR, label="Fysisk")
+                tlx_temporal = gr.Radio(choices=NASA_SCALE_STR, label="Tidsbrist")
+            with gr.Column():
+                tlx_performance = gr.Radio(choices=NASA_SCALE_STR, label="Prestation")
+                tlx_effort = gr.Radio(choices=NASA_SCALE_STR, label="Anstrangning")
+                tlx_frustration = gr.Radio(choices=NASA_SCALE_STR, label="Frustration")
+        with gr.Row():
+            save_btn = gr.Button("Spara utvardering & ladda ner", variant="primary", scale=2)
+            clear_btn = gr.Button("Rensa all data fran granssnittet", variant="secondary", scale=1)
+        eval_status = gr.Textbox(label="", interactive=False,
+                                 placeholder="Status visas har efter sparning...")
+        download_file = gr.File(
+            label="Komplett resultat + utvardering (JSON) - klicka for att ladda ner",
+            interactive=False,
+        )
+    # Event handlers
+    process_btn.click(
+        fn=run_pipeline,
+        inputs=[audio_input, text_input],
+        outputs=[transcription_out, anonymized_out, zero_out, few_out, cot_out],
     )
+    def on_save(c, h, s, cl, cm, m, p, t, pe, e, f,
+                transcription, zero, few, cot):
+        """Combine pipeline results + evaluation into ONE downloadable file."""
+        if not any([c, h, s, cl]):
+            return "Fyll i minst ett svar i Del 1.", None
+        filled = [int(x) for x in [m, p, t, pe, e, f] if x]
+        entry = {
+            "timestamp": datetime.datetime.now().isoformat(),
+            "system": f"{Config.APP_NAME} v{Config.APP_VERSION}",
+            "pipeline_results": {
+                "transcription": transcription,
+                "vips": {
+                    "zero_shot":        zero,
+                    "few_shot":         few,
+                    "chain_of_thought": cot,
+                },
+            },
+            "prompt_evaluation": {
+                "most_complete":       c,
+                "least_hallucination": h,
+                "best_structure":      s,
+                "clinical_choice":     cl,
+                "comment":             cm or "",
+            },
+            "nasa_tlx": {
+                "mental":       m,
+                "physical":     p,
+                "temporal":     t,
+                "performance":  pe,
+                "effort":       e,
+                "frustration":  f,
+                "total_avg":    round(sum(filled)/len(filled), 2) if filled else None,
+            },
+        }
+        try:
+            save_evaluation(entry)
+        except Exception as ex:
+            logger.warning(f"Server save failed: {ex}")
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"/tmp/voicenote_utvardering_{timestamp}.json"
+        with open(filename, "w", encoding="utf-8") as fh:
+            json.dump(entry, fh, ensure_ascii=False, indent=2)
+        return "Utvardering sparad! Fil klar for nedladdning nedan.", filename
     save_btn.click(
+        fn=on_save,
+        inputs=[eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
+                tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
+                transcription_out, zero_out, few_out, cot_out],
+        outputs=[eval_status, download_file],
     )
+    def clear_all():
+        """Reset all UI fields - no data remains in interface or memory."""
+        return (
+            None, "",
+            "", "", "", "", "",
+            None, None, None, None, "",
+            None, None, None, None, None, None,
+            "All data rensad fran granssnittet.",
+            None,
+        )
     clear_btn.click(
+        fn=clear_all,
+        inputs=[],
+        outputs=[
+            audio_input, text_input,
+            transcription_out, anonymized_out, zero_out, few_out, cot_out,
+            eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
+            tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
+            eval_status, download_file,
+        ],
     )
 if __name__ == "__main__":
+    demo.launch(css=custom_css)