Spaces:

SsebaA
/

x

Running on Zero

App Files Files Community

x / app.py

SsebaA

Update app.py

6ca1379 verified 12 days ago

raw

history blame contribute delete

14.2 kB

	"""
	app.py - VoiceNote AI v2.1
	Graceful DeepL fallback: when DeepL quota exhausted or fails,
	Swedish text is sent directly to Scaleway LLM instead.
	"""

	import json
	import logging
	import datetime
	import spaces
	import gradio as gr

	from config import Config
	from gdpr_filter import apply_gdpr_filter
	from models import WhisperASR, DeepLTranslator, MistralClient
	from vips_classifier import classify_all
	from utils import calculate_wer, format_vips_output, save_evaluation

	logger = logging.getLogger(__name__)

	asr_model = WhisperASR()
	deepl_client = None
	mistral_client = None

	def _get_clients():
	global deepl_client, mistral_client
	if deepl_client is None:
	try:
	deepl_client = DeepLTranslator()
	except Exception as e:
	logger.warning(f"DeepL client init failed: {e}")
	deepl_client = None
	if mistral_client is None:
	mistral_client = MistralClient()
	return deepl_client, mistral_client


	def _make_json(transcription, wer, zero, few, cot):
	timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"/tmp/voicenote_{timestamp}.json"
	data = {
	"timestamp": datetime.datetime.now().isoformat(),
	"system": "VoiceNote AI v2.1",
	"transcription": transcription,
	"wer": wer,
	"vips_results": {"zero_shot": zero, "few_shot": few, "chain_of_thought": cot}
	}
	with open(filename, "w", encoding="utf-8") as f:
	json.dump(data, f, ensure_ascii=False, indent=2)
	return filename


	@spaces.GPU
	def run_pipeline_audio(audio, reference_text):
	try:
	swedish_text = asr_model.transcribe(audio)
	if not swedish_text or not swedish_text.strip():
	return ("Transkriptionen ar tom.", "", "", "", "", "", "")
	except Exception as e:
	logger.exception("ASR failed")
	return (f"[FEL ASR]: {e}", "", "", "", "", "", "")
	return _run_common(swedish_text, reference_text)


	def run_pipeline_text(text_input, reference_text):
	if not text_input or not text_input.strip():
	return ("Ingen text angiven.", "", "", "", "", "", "")
	return _run_common(text_input.strip(), reference_text)


	def _run_common(swedish_text, reference_text):
	logger.info("Running GDPR filter...")
	anonymized_sv = apply_gdpr_filter(swedish_text)

	# Get clients
	try:
	dl, mc = _get_clients()
	except Exception as e:
	logger.exception("Client init failed")
	return (swedish_text, anonymized_sv, f"[FEL]: {e}", "", "", "", "")

	# Try DeepL with graceful fallback to Swedish
	logger.info("Running DeepL (with fallback)...")
	if dl is None:
	# DeepL client never initialized
	logger.warning("DeepL unavailable - using Swedish text for LLM")
	english_text_display = "[DeepL ej tillganglig - skickar svensk text direkt till LLM]\n\n" + anonymized_sv
	text_for_llm = anonymized_sv
	else:
	try:
	english_text = dl.translate(anonymized_sv)
	english_text_display = english_text
	text_for_llm = english_text
	logger.info("DeepL translation OK")
	except Exception as e:
	logger.warning(f"DeepL failed ({e}) - falling back to Swedish")
	english_text_display = f"[DeepL FALLBACK: {str(e)[:80]}]\n\n[Skickar svensk text direkt till LLM:]\n\n{anonymized_sv}"
	text_for_llm = anonymized_sv

	# Calculate WER if reference provided
	wer_display = ""
	if reference_text and reference_text.strip():
	wer = calculate_wer(reference_text.strip(), swedish_text)
	wer_display = f"WER: {wer:.1f}%"

	# Send to Scaleway LLM (text_for_llm is either English or Swedish)
	logger.info("Running Scaleway LLM...")
	try:
	all_results = classify_all(text_for_llm, mc)
	logger.info("Scaleway classification complete")
	except Exception as e:
	logger.exception("LLM failed")
	err = f"[FEL LLM]: {e}"
	return (swedish_text, anonymized_sv, english_text_display, wer_display, err, err, err)

	zero_text = format_vips_output(all_results["zero_shot"])
	few_text = format_vips_output(all_results["few_shot"])
	cot_text = format_vips_output(all_results["chain_of_thought"])

	logger.info("Returning results to UI")
	return (swedish_text, anonymized_sv, english_text_display, wer_display,
	zero_text, few_text, cot_text)


	def run_pipeline(audio, text_input, reference_text):
	if audio is not None:
	return run_pipeline_audio(audio, reference_text)
	return run_pipeline_text(text_input, reference_text)


	PROMPT_CHOICES = ["Zero-shot", "Few-shot", "Chain-of-Thought"]
	NASA_SCALE_STR = ["1", "2", "3", "4", "5", "6", "7"]

	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600&display=swap');
	* { font-family: 'DM Sans', sans-serif !important; }
	.gradio-container { background: #f0f4f8 !important; max-width: 1400px !important; margin: 0 auto; }
	.header-banner {
	background: linear-gradient(135deg, #1a5276 0%, #2980b9 100%);
	border-radius: 16px; padding: 32px 40px; margin-bottom: 8px;
	}
	.header-banner h1 { color: white !important; font-size: 2rem !important; font-weight: 600 !important; margin: 0 0 6px 0 !important; }
	.header-banner p { color: rgba(255,255,255,0.85) !important; font-size: 0.9rem !important; margin: 0 !important; }
	.section-card { background: white; border-radius: 14px; padding: 28px; margin-bottom: 16px; border: 1px solid #e8ecf0; }
	.section-label {
	font-size: 0.7rem !important; font-weight: 600 !important;
	letter-spacing: 0.12em !important; text-transform: uppercase !important;
	color: #2980b9 !important; margin-bottom: 16px !important;
	}
	.vips-col-zero { border-top: 3px solid #e74c3c !important; border-radius: 10px; padding: 16px; }
	.vips-col-few { border-top: 3px solid #2980b9 !important; border-radius: 10px; padding: 16px; }
	.vips-col-cot { border-top: 3px solid #27ae60 !important; border-radius: 10px; padding: 16px; }
	.gr-button-primary {
	background: linear-gradient(135deg, #1a5276, #2980b9) !important;
	border: none !important; border-radius: 10px !important; font-weight: 600 !important;
	}
	footer, .footer, .gradio-container > footer,
	a[href="gradio.app"], a[href="/?view=api"] {
	display: none !important;
	visibility: hidden !important;
	}
	"""


	with gr.Blocks(title="VoiceNote AI") as demo:

	gr.HTML("""
	<div class="header-banner">
	<h1>VoiceNote AI</h1>
	<p>VIPS-journalgenerering \| Whisper KBLab -> GDPR -> DeepL (fallback: SV) -> Scaleway</p>
	</div>
	""")

	with gr.Group(elem_classes="section-card"):
	gr.Markdown("##### INMATNING", elem_classes="section-label")
	with gr.Row(equal_height=True):
	audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath",
	label="Ljud", scale=1)
	text_input = gr.Textbox(label="Eller text", lines=5, scale=1,
	placeholder="Klistra in patientsamtalet har...")
	with gr.Row():
	reference_input = gr.Textbox(label="Referenstext for WER (valfritt)",
	lines=2, scale=3)
	process_btn = gr.Button("Generera journalanteckning",
	variant="primary", size="lg", scale=1)

	with gr.Group(elem_classes="section-card"):
	gr.Markdown("##### RESULTAT", elem_classes="section-label")

	wer_out = gr.Textbox(label="Word Error Rate", interactive=False)

	with gr.Accordion("Pipeline-detaljer", open=False):
	with gr.Row():
	transcription_out = gr.Textbox(label="Transkription (SV)",
	lines=5, interactive=True)
	anonymized_out = gr.Textbox(label="Anonymiserad (SV)",
	lines=5, interactive=False)
	translated_out = gr.Textbox(label="Oversatt (EN) eller fallback",
	lines=5, interactive=False)

	gr.Markdown("##### VIPS - TRE PROMPTSTRATEGIER", elem_classes="section-label")
	with gr.Row():
	with gr.Column(elem_classes="vips-col-zero"):
	gr.HTML("<h4>Zero-shot</h4>")
	zero_out = gr.Textbox(label="", lines=10, interactive=True)
	with gr.Column(elem_classes="vips-col-few"):
	gr.HTML("<h4>Few-shot</h4>")
	few_out = gr.Textbox(label="", lines=10, interactive=True)
	with gr.Column(elem_classes="vips-col-cot"):
	gr.HTML("<h4>Chain-of-Thought</h4>")
	cot_out = gr.Textbox(label="", lines=10, interactive=True)

	with gr.Group(elem_classes="section-card"):
	gr.Markdown("##### UTVARDERING", elem_classes="section-label")
	gr.Markdown("Del 1 - Jamforelse av promptstrategier")
	with gr.Row():
	with gr.Column():
	eval_complete = gr.Radio(choices=PROMPT_CHOICES,
	label="1. Mest fullstandig?")
	eval_hallucination = gr.Radio(choices=PROMPT_CHOICES,
	label="2. Undvek bast att hitta pa information?")
	with gr.Column():
	eval_structure = gr.Radio(choices=PROMPT_CHOICES,
	label="3. Foljde VIPS-strukturen bast?")
	eval_clinical = gr.Radio(choices=PROMPT_CHOICES,
	label="4. Skulle valjas i klinisk praktik?")
	eval_comment = gr.Textbox(label="5. Kommentar", lines=3)

	gr.Markdown("---\nDel 2 - NASA-TLX \| 1 = lag, 7 = hog")
	with gr.Row():
	with gr.Column():
	tlx_mental = gr.Radio(choices=NASA_SCALE_STR, label="Mental")
	tlx_physical = gr.Radio(choices=NASA_SCALE_STR, label="Fysisk")
	tlx_temporal = gr.Radio(choices=NASA_SCALE_STR, label="Tidsbrist")
	with gr.Column():
	tlx_performance = gr.Radio(choices=NASA_SCALE_STR, label="Prestation")
	tlx_effort = gr.Radio(choices=NASA_SCALE_STR, label="Anstrangning")
	tlx_frustration = gr.Radio(choices=NASA_SCALE_STR, label="Frustration")

	with gr.Row():
	save_btn = gr.Button("Spara utvardering & ladda ner", variant="primary", scale=2)
	clear_btn = gr.Button("Rensa all data fran granssnittet", variant="secondary", scale=1)

	eval_status = gr.Textbox(label="", interactive=False,
	placeholder="Status visas har efter sparning...")

	download_file = gr.File(
	label="Komplett resultat + utvardering (JSON) - klicka for att ladda ner",
	interactive=False,
	)

	# Event handlers
	process_btn.click(
	fn=run_pipeline,
	inputs=[audio_input, text_input, reference_input],
	outputs=[transcription_out, anonymized_out, translated_out, wer_out,
	zero_out, few_out, cot_out],
	)

	def on_save(c, h, s, cl, cm, m, p, t, pe, e, f,
	transcription, wer, zero, few, cot):
	"""Combine pipeline results + evaluation into ONE downloadable file."""
	if not any([c, h, s, cl]):
	return "Fyll i minst ett svar i Del 1.", None

	filled = [int(x) for x in [m, p, t, pe, e, f] if x]

	entry = {
	"timestamp": datetime.datetime.now().isoformat(),
	"system": "VoiceNote AI v2.1",

	"pipeline_results": {
	"transcription": transcription,
	"wer": wer,
	"vips": {
	"zero_shot": zero,
	"few_shot": few,
	"chain_of_thought": cot,
	},
	},

	"prompt_evaluation": {
	"most_complete": c,
	"least_hallucination": h,
	"best_structure": s,
	"clinical_choice": cl,
	"comment": cm or "",
	},

	"nasa_tlx": {
	"mental": m,
	"physical": p,
	"temporal": t,
	"performance": pe,
	"effort": e,
	"frustration": f,
	"total_avg": round(sum(filled)/len(filled), 2) if filled else None,
	},
	}

	try:
	save_evaluation(entry)
	except Exception as ex:
	logger.warning(f"Server save failed: {ex}")

	timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"/tmp/voicenote_utvardering_{timestamp}.json"
	with open(filename, "w", encoding="utf-8") as fh:
	json.dump(entry, fh, ensure_ascii=False, indent=2)

	return "Utvardering sparad! Fil klar for nedladdning nedan.", filename

	save_btn.click(
	fn=on_save,
	inputs=[eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
	tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
	transcription_out, wer_out, zero_out, few_out, cot_out],
	outputs=[eval_status, download_file],
	)

	def clear_all():
	"""Reset all UI fields - no data remains in interface or memory."""
	return (
	None, "", "", "", "", "", "", "", "", "",
	None, None, None, None, "",
	None, None, None, None, None, None,
	"All data rensad fran granssnittet.",
	None,
	)

	clear_btn.click(
	fn=clear_all,
	inputs=[],
	outputs=[
	audio_input, text_input, reference_input,
	transcription_out, anonymized_out, translated_out, wer_out,
	zero_out, few_out, cot_out,
	eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
	tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
	eval_status, download_file,
	],
	)


	if __name__ == "__main__":
	demo.launch(css=custom_css)