x / app.py
SsebaA's picture
Update app.py
6ca1379 verified
"""
app.py - VoiceNote AI v2.1
Graceful DeepL fallback: when DeepL quota exhausted or fails,
Swedish text is sent directly to Scaleway LLM instead.
"""
import json
import logging
import datetime
import spaces
import gradio as gr
from config import Config
from gdpr_filter import apply_gdpr_filter
from models import WhisperASR, DeepLTranslator, MistralClient
from vips_classifier import classify_all
from utils import calculate_wer, format_vips_output, save_evaluation
logger = logging.getLogger(__name__)
asr_model = WhisperASR()
deepl_client = None
mistral_client = None
def _get_clients():
global deepl_client, mistral_client
if deepl_client is None:
try:
deepl_client = DeepLTranslator()
except Exception as e:
logger.warning(f"DeepL client init failed: {e}")
deepl_client = None
if mistral_client is None:
mistral_client = MistralClient()
return deepl_client, mistral_client
def _make_json(transcription, wer, zero, few, cot):
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"/tmp/voicenote_{timestamp}.json"
data = {
"timestamp": datetime.datetime.now().isoformat(),
"system": "VoiceNote AI v2.1",
"transcription": transcription,
"wer": wer,
"vips_results": {"zero_shot": zero, "few_shot": few, "chain_of_thought": cot}
}
with open(filename, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
return filename
@spaces.GPU
def run_pipeline_audio(audio, reference_text):
try:
swedish_text = asr_model.transcribe(audio)
if not swedish_text or not swedish_text.strip():
return ("Transkriptionen ar tom.", "", "", "", "", "", "")
except Exception as e:
logger.exception("ASR failed")
return (f"[FEL ASR]: {e}", "", "", "", "", "", "")
return _run_common(swedish_text, reference_text)
def run_pipeline_text(text_input, reference_text):
if not text_input or not text_input.strip():
return ("Ingen text angiven.", "", "", "", "", "", "")
return _run_common(text_input.strip(), reference_text)
def _run_common(swedish_text, reference_text):
logger.info("Running GDPR filter...")
anonymized_sv = apply_gdpr_filter(swedish_text)
# Get clients
try:
dl, mc = _get_clients()
except Exception as e:
logger.exception("Client init failed")
return (swedish_text, anonymized_sv, f"[FEL]: {e}", "", "", "", "")
# Try DeepL with graceful fallback to Swedish
logger.info("Running DeepL (with fallback)...")
if dl is None:
# DeepL client never initialized
logger.warning("DeepL unavailable - using Swedish text for LLM")
english_text_display = "[DeepL ej tillganglig - skickar svensk text direkt till LLM]\n\n" + anonymized_sv
text_for_llm = anonymized_sv
else:
try:
english_text = dl.translate(anonymized_sv)
english_text_display = english_text
text_for_llm = english_text
logger.info("DeepL translation OK")
except Exception as e:
logger.warning(f"DeepL failed ({e}) - falling back to Swedish")
english_text_display = f"[DeepL FALLBACK: {str(e)[:80]}]\n\n[Skickar svensk text direkt till LLM:]\n\n{anonymized_sv}"
text_for_llm = anonymized_sv
# Calculate WER if reference provided
wer_display = ""
if reference_text and reference_text.strip():
wer = calculate_wer(reference_text.strip(), swedish_text)
wer_display = f"WER: {wer:.1f}%"
# Send to Scaleway LLM (text_for_llm is either English or Swedish)
logger.info("Running Scaleway LLM...")
try:
all_results = classify_all(text_for_llm, mc)
logger.info("Scaleway classification complete")
except Exception as e:
logger.exception("LLM failed")
err = f"[FEL LLM]: {e}"
return (swedish_text, anonymized_sv, english_text_display, wer_display, err, err, err)
zero_text = format_vips_output(all_results["zero_shot"])
few_text = format_vips_output(all_results["few_shot"])
cot_text = format_vips_output(all_results["chain_of_thought"])
logger.info("Returning results to UI")
return (swedish_text, anonymized_sv, english_text_display, wer_display,
zero_text, few_text, cot_text)
def run_pipeline(audio, text_input, reference_text):
if audio is not None:
return run_pipeline_audio(audio, reference_text)
return run_pipeline_text(text_input, reference_text)
PROMPT_CHOICES = ["Zero-shot", "Few-shot", "Chain-of-Thought"]
NASA_SCALE_STR = ["1", "2", "3", "4", "5", "6", "7"]
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600&display=swap');
* { font-family: 'DM Sans', sans-serif !important; }
.gradio-container { background: #f0f4f8 !important; max-width: 1400px !important; margin: 0 auto; }
.header-banner {
background: linear-gradient(135deg, #1a5276 0%, #2980b9 100%);
border-radius: 16px; padding: 32px 40px; margin-bottom: 8px;
}
.header-banner h1 { color: white !important; font-size: 2rem !important; font-weight: 600 !important; margin: 0 0 6px 0 !important; }
.header-banner p { color: rgba(255,255,255,0.85) !important; font-size: 0.9rem !important; margin: 0 !important; }
.section-card { background: white; border-radius: 14px; padding: 28px; margin-bottom: 16px; border: 1px solid #e8ecf0; }
.section-label {
font-size: 0.7rem !important; font-weight: 600 !important;
letter-spacing: 0.12em !important; text-transform: uppercase !important;
color: #2980b9 !important; margin-bottom: 16px !important;
}
.vips-col-zero { border-top: 3px solid #e74c3c !important; border-radius: 10px; padding: 16px; }
.vips-col-few { border-top: 3px solid #2980b9 !important; border-radius: 10px; padding: 16px; }
.vips-col-cot { border-top: 3px solid #27ae60 !important; border-radius: 10px; padding: 16px; }
.gr-button-primary {
background: linear-gradient(135deg, #1a5276, #2980b9) !important;
border: none !important; border-radius: 10px !important; font-weight: 600 !important;
}
footer, .footer, .gradio-container > footer,
a[href*="gradio.app"], a[href*="/?view=api"] {
display: none !important;
visibility: hidden !important;
}
"""
with gr.Blocks(title="VoiceNote AI") as demo:
gr.HTML("""
<div class="header-banner">
<h1>VoiceNote AI</h1>
<p>VIPS-journalgenerering | Whisper KBLab -> GDPR -> DeepL (fallback: SV) -> Scaleway</p>
</div>
""")
with gr.Group(elem_classes="section-card"):
gr.Markdown("##### INMATNING", elem_classes="section-label")
with gr.Row(equal_height=True):
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath",
label="Ljud", scale=1)
text_input = gr.Textbox(label="Eller text", lines=5, scale=1,
placeholder="Klistra in patientsamtalet har...")
with gr.Row():
reference_input = gr.Textbox(label="Referenstext for WER (valfritt)",
lines=2, scale=3)
process_btn = gr.Button("Generera journalanteckning",
variant="primary", size="lg", scale=1)
with gr.Group(elem_classes="section-card"):
gr.Markdown("##### RESULTAT", elem_classes="section-label")
wer_out = gr.Textbox(label="Word Error Rate", interactive=False)
with gr.Accordion("Pipeline-detaljer", open=False):
with gr.Row():
transcription_out = gr.Textbox(label="Transkription (SV)",
lines=5, interactive=True)
anonymized_out = gr.Textbox(label="Anonymiserad (SV)",
lines=5, interactive=False)
translated_out = gr.Textbox(label="Oversatt (EN) eller fallback",
lines=5, interactive=False)
gr.Markdown("##### VIPS - TRE PROMPTSTRATEGIER", elem_classes="section-label")
with gr.Row():
with gr.Column(elem_classes="vips-col-zero"):
gr.HTML("<h4>Zero-shot</h4>")
zero_out = gr.Textbox(label="", lines=10, interactive=True)
with gr.Column(elem_classes="vips-col-few"):
gr.HTML("<h4>Few-shot</h4>")
few_out = gr.Textbox(label="", lines=10, interactive=True)
with gr.Column(elem_classes="vips-col-cot"):
gr.HTML("<h4>Chain-of-Thought</h4>")
cot_out = gr.Textbox(label="", lines=10, interactive=True)
with gr.Group(elem_classes="section-card"):
gr.Markdown("##### UTVARDERING", elem_classes="section-label")
gr.Markdown("**Del 1 - Jamforelse av promptstrategier**")
with gr.Row():
with gr.Column():
eval_complete = gr.Radio(choices=PROMPT_CHOICES,
label="1. Mest fullstandig?")
eval_hallucination = gr.Radio(choices=PROMPT_CHOICES,
label="2. Undvek bast att hitta pa information?")
with gr.Column():
eval_structure = gr.Radio(choices=PROMPT_CHOICES,
label="3. Foljde VIPS-strukturen bast?")
eval_clinical = gr.Radio(choices=PROMPT_CHOICES,
label="4. Skulle valjas i klinisk praktik?")
eval_comment = gr.Textbox(label="5. Kommentar", lines=3)
gr.Markdown("---\n**Del 2 - NASA-TLX** | *1 = lag, 7 = hog*")
with gr.Row():
with gr.Column():
tlx_mental = gr.Radio(choices=NASA_SCALE_STR, label="Mental")
tlx_physical = gr.Radio(choices=NASA_SCALE_STR, label="Fysisk")
tlx_temporal = gr.Radio(choices=NASA_SCALE_STR, label="Tidsbrist")
with gr.Column():
tlx_performance = gr.Radio(choices=NASA_SCALE_STR, label="Prestation")
tlx_effort = gr.Radio(choices=NASA_SCALE_STR, label="Anstrangning")
tlx_frustration = gr.Radio(choices=NASA_SCALE_STR, label="Frustration")
with gr.Row():
save_btn = gr.Button("Spara utvardering & ladda ner", variant="primary", scale=2)
clear_btn = gr.Button("Rensa all data fran granssnittet", variant="secondary", scale=1)
eval_status = gr.Textbox(label="", interactive=False,
placeholder="Status visas har efter sparning...")
download_file = gr.File(
label="Komplett resultat + utvardering (JSON) - klicka for att ladda ner",
interactive=False,
)
# Event handlers
process_btn.click(
fn=run_pipeline,
inputs=[audio_input, text_input, reference_input],
outputs=[transcription_out, anonymized_out, translated_out, wer_out,
zero_out, few_out, cot_out],
)
def on_save(c, h, s, cl, cm, m, p, t, pe, e, f,
transcription, wer, zero, few, cot):
"""Combine pipeline results + evaluation into ONE downloadable file."""
if not any([c, h, s, cl]):
return "Fyll i minst ett svar i Del 1.", None
filled = [int(x) for x in [m, p, t, pe, e, f] if x]
entry = {
"timestamp": datetime.datetime.now().isoformat(),
"system": "VoiceNote AI v2.1",
"pipeline_results": {
"transcription": transcription,
"wer": wer,
"vips": {
"zero_shot": zero,
"few_shot": few,
"chain_of_thought": cot,
},
},
"prompt_evaluation": {
"most_complete": c,
"least_hallucination": h,
"best_structure": s,
"clinical_choice": cl,
"comment": cm or "",
},
"nasa_tlx": {
"mental": m,
"physical": p,
"temporal": t,
"performance": pe,
"effort": e,
"frustration": f,
"total_avg": round(sum(filled)/len(filled), 2) if filled else None,
},
}
try:
save_evaluation(entry)
except Exception as ex:
logger.warning(f"Server save failed: {ex}")
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"/tmp/voicenote_utvardering_{timestamp}.json"
with open(filename, "w", encoding="utf-8") as fh:
json.dump(entry, fh, ensure_ascii=False, indent=2)
return "Utvardering sparad! Fil klar for nedladdning nedan.", filename
save_btn.click(
fn=on_save,
inputs=[eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
transcription_out, wer_out, zero_out, few_out, cot_out],
outputs=[eval_status, download_file],
)
def clear_all():
"""Reset all UI fields - no data remains in interface or memory."""
return (
None, "", "", "", "", "", "", "", "", "",
None, None, None, None, "",
None, None, None, None, None, None,
"All data rensad fran granssnittet.",
None,
)
clear_btn.click(
fn=clear_all,
inputs=[],
outputs=[
audio_input, text_input, reference_input,
transcription_out, anonymized_out, translated_out, wer_out,
zero_out, few_out, cot_out,
eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
eval_status, download_file,
],
)
if __name__ == "__main__":
demo.launch(css=custom_css)