SsebaA commited on
Commit
0586e99
·
verified ·
1 Parent(s): 54eb47d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +256 -207
app.py CHANGED
@@ -1,24 +1,22 @@
1
-
2
-
3
  import logging
 
 
4
  import gradio as gr
 
 
 
5
  from models import WhisperASR, MistralClient
6
- from gdpr_filter import GDPRFilter
7
- from vips_classifier import VIPSClassifier
8
- from config import Config
9
 
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
 
13
- # Initialize components
14
- whisper_model = WhisperASR()
15
- gdpr_filter = GDPRFilter()
16
- llm_client = MistralClient()
17
- vips_classifier = VIPSClassifier(llm_client)
18
 
 
 
19
 
20
  def format_vips_output(text) -> str:
21
- """Format VIPS output, handling dict or string types."""
22
  if isinstance(text, dict):
23
  text = str(text)
24
 
@@ -28,219 +26,270 @@ def format_vips_output(text) -> str:
28
  return str(text).strip()
29
 
30
 
31
- def run_pipeline_audio(audio_input, reference_text=""):
32
- """Process audio input through full pipeline."""
33
- if audio_input is None:
34
- return "❌ No audio input provided", "", "", "", ""
35
-
36
- logger.info("Processing audio input...")
37
-
38
- # Step 1: ASR
39
- logger.info("Running Whisper ASR...")
40
- transcript = whisper_model.transcribe(audio_input)
41
-
42
- if not transcript:
43
- return " ASR failed - no output", "", "", "", ""
44
-
45
- # Calculate WER if reference provided
46
- wer_result = ""
47
- if reference_text and reference_text.strip():
48
- from models import calculate_wer
49
- wer_value = calculate_wer(reference_text, transcript)
50
- wer_result = f"WER: {wer_value:.2f}%"
51
-
52
- return _run_common(transcript, wer_result)
53
 
54
 
55
  def run_pipeline_text(text_input):
56
- """Process text input through pipeline (skip ASR)."""
57
  if not text_input or not text_input.strip():
58
- return " No text input provided", "", "", "", ""
59
-
60
- logger.info("Processing text input (ASR skipped)...")
61
- return _run_common(text_input.strip(), "ASR: Skipped")
62
 
63
 
64
- def _run_common(text_input, wer_info=""):
65
- """Common pipeline: GDPR → VIPS Classification."""
66
-
67
- # Step 2: GDPR Filter
68
  logger.info("Running GDPR filter...")
69
- anonymized_text = gdpr_filter.apply_dual_layer_gdpr(text_input)
70
-
71
- # Step 3: VIPS Classification (3 strategies)
 
 
 
 
 
 
 
72
  logger.info("Running Scaleway LLM...")
73
- all_results = vips_classifier.classify_vips(anonymized_text)
74
-
75
- # Format outputs
76
- zero_text = format_vips_output(all_results.get("zero_shot", ""))
77
- few_text = format_vips_output(all_results.get("few_shot", ""))
78
- chain_text = format_vips_output(all_results.get("chain_of_thought", ""))
79
-
80
- logger.info("Pipeline complete")
81
-
82
- return (
83
- f"✅ Transcription:\n{text_input}\n\n[{wer_info}]",
84
- zero_text,
85
- few_text,
86
- chain_text,
87
- f"Anonymized: {len(anonymized_text)} chars"
88
- )
89
 
 
 
 
90
 
91
- def on_save_results(zero, few, chain):
92
- """Save results to file."""
93
- import json
94
- from datetime import datetime
95
-
96
- data = {
97
- "timestamp": datetime.now().isoformat(),
98
- "zero_shot": zero,
99
- "few_shot": few,
100
- "chain_of_thought": chain,
101
- }
102
-
103
- filename = f"{Config.APP_NAME}_v{Config.APP_VERSION}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
104
- with open(filename, 'w', encoding='utf-8') as f:
105
- json.dump(data, f, indent=2, ensure_ascii=False)
106
-
107
- return f"✅ Saved to {filename}"
108
 
109
 
110
- # ============================================================================
111
- # GRADIO INTERFACE 2+1 LAYOUT (2 columns on top, 1 full-width below)
112
- # ============================================================================
 
113
 
114
- with gr.Blocks(title=f"{Config.APP_NAME} v{Config.APP_VERSION}", theme=gr.themes.Soft()) as demo:
115
-
116
- # Header
117
- gr.Markdown(f"""
118
- # {Config.APP_NAME}
119
- **Automated VIPS Documentation from Swedish Patient-Nurse Conversations**
120
-
121
- Pipeline: Whisper KBLab GDPR Filter Scaleway LLM (VIPS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  """)
123
-
124
- # ========== INPUTS ==========
125
- with gr.Group():
126
- gr.Markdown("### 📥 Input")
127
-
128
- with gr.Tabs():
129
- # Tab 1: Audio input
130
- with gr.TabItem("🎤 Voice Recording"):
131
- audio_input = gr.Audio(
132
- label="Record or upload audio",
133
- type="filepath",
134
- format="wav"
135
- )
136
- reference_text = gr.Textbox(
137
- label="Reference text (for WER calculation, optional)",
138
- lines=3,
139
- placeholder="Paste correct transcription here..."
140
- )
141
- submit_audio = gr.Button("🚀 Process Audio", variant="primary")
142
-
143
- # Tab 2: Text input
144
- with gr.TabItem("📝 Text Input"):
145
- text_input = gr.Textbox(
146
- label="Or paste text directly (skips ASR)",
147
- lines=5,
148
- placeholder="Paste transcribed conversation here..."
149
- )
150
- submit_text = gr.Button("🚀 Process Text", variant="primary")
151
-
152
- # ========== OUTPUTS ==========
153
- gr.Markdown("### 📤 Output")
154
-
155
- # Transcription box
156
- transcript_box = gr.Textbox(
157
- label="Transcription & Metrics",
158
- lines=4,
159
- interactive=False
160
- )
161
-
162
- # **2-COLUMN LAYOUT FOR ZERO-SHOT AND FEW-SHOT**
163
- with gr.Row():
164
- with gr.Column():
165
- zero_shot_output = gr.Textbox(
166
- label="Zero-shot",
167
- lines=20,
168
- interactive=True,
169
- show_label=True
170
- )
171
-
172
- with gr.Column():
173
- few_shot_output = gr.Textbox(
174
- label="Few-shot",
175
- lines=20,
176
- interactive=True,
177
- show_label=True
178
- )
179
-
180
- # **FULL-WIDTH LAYOUT FOR CHAIN-OF-THOUGHT**
181
- chain_of_thought_output = gr.Textbox(
182
- label="Chain-of-Thought",
183
- lines=20,
184
- interactive=True,
185
- show_label=True
186
- )
187
-
188
- # Info box
189
- info_box = gr.Textbox(
190
- label="Info",
191
- interactive=False
192
- )
193
-
194
- # ========== ACTIONS ==========
195
- with gr.Row():
196
- save_btn = gr.Button("💾 Save Results as JSON", variant="secondary")
197
- clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
198
-
199
- save_status = gr.Textbox(label="Status", interactive=False)
200
-
201
- # ========== EVENT HANDLERS ==========
202
-
203
- # Process audio
204
- submit_audio.click(
205
- fn=run_pipeline_audio,
206
- inputs=[audio_input, reference_text],
207
- outputs=[transcript_box, zero_shot_output, few_shot_output, chain_of_thought_output, info_box]
208
- )
209
-
210
- # Process text
211
- submit_text.click(
212
- fn=run_pipeline_text,
213
- inputs=[text_input],
214
- outputs=[transcript_box, zero_shot_output, few_shot_output, chain_of_thought_output, info_box]
215
  )
216
-
217
- # Save results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  save_btn.click(
219
- fn=on_save_results,
220
- inputs=[zero_shot_output, few_shot_output, chain_of_thought_output],
221
- outputs=[save_status]
 
 
222
  )
223
-
224
- # Clear all
 
 
 
 
 
 
 
 
 
 
225
  clear_btn.click(
226
- fn=lambda: ("", "", "", "", "", ""),
227
- outputs=[audio_input, text_input, transcript_box, zero_shot_output,
228
- few_shot_output, chain_of_thought_output]
 
 
 
 
 
 
229
  )
230
-
231
- # Footer
232
- gr.Markdown("""
233
- ---
234
- **⚠️ Disclaimer:** This system generates nursing documentation drafts only.
235
- **Always review and approve** AI-generated notes before clinical use.
236
- **Never rely on system output** for medical decision-making.
237
- """)
238
 
239
 
240
  if __name__ == "__main__":
241
- demo.launch(
242
- share=False,
243
- server_name="0.0.0.0",
244
- server_port=7860,
245
- show_error=True
246
- )
 
1
+ import json
 
2
  import logging
3
+ import datetime
4
+ import spaces
5
  import gradio as gr
6
+
7
+ from config import Config, VIPS_CATEGORIES
8
+ from gdpr_filter import apply_gdpr_filter
9
  from models import WhisperASR, MistralClient
10
+ from vips_classifier import classify_all
11
+
 
12
 
 
 
13
 
14
+ logger = logging.getLogger(__name__)
 
 
 
 
15
 
16
+ asr_model = WhisperASR()
17
+ mistral_client = None
18
 
19
  def format_vips_output(text) -> str:
 
20
  if isinstance(text, dict):
21
  text = str(text)
22
 
 
26
  return str(text).strip()
27
 
28
 
29
+ def _get_clients():
30
+ global mistral_client
31
+ if mistral_client is None:
32
+ mistral_client = MistralClient()
33
+ return mistral_client
34
+
35
+
36
+ @spaces.GPU
37
+ def run_pipeline_audio(audio):
38
+ try:
39
+ swedish_text = asr_model.transcribe(audio)
40
+ if not swedish_text or not swedish_text.strip():
41
+ return ("Transkriptionen ar tom.", "", "", "", "", "")
42
+ except Exception as e:
43
+ logger.exception("ASR failed")
44
+ return (f"[FEL ASR]: {e}", "", "", "", "", "")
45
+ return _run_common(swedish_text)
 
 
 
 
 
46
 
47
 
48
  def run_pipeline_text(text_input):
 
49
  if not text_input or not text_input.strip():
50
+ return ("Ingen text angiven.", "", "", "", "", "")
51
+ return _run_common(text_input.strip())
 
 
52
 
53
 
54
+ def _run_common(swedish_text):
 
 
 
55
  logger.info("Running GDPR filter...")
56
+ anonymized_sv = apply_gdpr_filter(swedish_text)
57
+
58
+ # Get clients
59
+ try:
60
+ mc = _get_clients()
61
+ except Exception as e:
62
+ logger.exception("Client init failed")
63
+ return (swedish_text, anonymized_sv, f"[FEL]: {e}", "", "", "")
64
+
65
+ # Send to Scaleway LLM
66
  logger.info("Running Scaleway LLM...")
67
+ try:
68
+ all_results = classify_all(anonymized_sv, mc)
69
+ logger.info("Scaleway classification complete")
70
+ except Exception as e:
71
+ logger.exception("LLM failed")
72
+ err = f"[FEL LLM]: {e}"
73
+ return (swedish_text, anonymized_sv, err, err, err, err)
 
 
 
 
 
 
 
 
 
74
 
75
+ zero_text = format_vips_output(all_results["zero_shot"])
76
+ few_text = format_vips_output(all_results["few_shot"])
77
+ cot_text = format_vips_output(all_results["chain_of_thought"])
78
 
79
+ logger.info("Returning results to UI")
80
+ return (swedish_text, anonymized_sv, zero_text, few_text, cot_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
 
83
+ def run_pipeline(audio, text_input):
84
+ if audio is not None:
85
+ return run_pipeline_audio(audio)
86
+ return run_pipeline_text(text_input)
87
 
88
+
89
+ PROMPT_CHOICES = ["Zero-shot", "Few-shot", "Chain-of-Thought"]
90
+ NASA_SCALE_STR = ["1", "2", "3", "4", "5", "6", "7"]
91
+
92
+ custom_css = """
93
+ @import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600&display=swap');
94
+ * { font-family: 'DM Sans', sans-serif !important; }
95
+ .gradio-container { background: #f0f4f8 !important; max-width: 1400px !important; margin: 0 auto; }
96
+ .header-banner {
97
+ background: linear-gradient(135deg, #1a5276 0%, #2980b9 100%);
98
+ border-radius: 16px; padding: 32px 40px; margin-bottom: 8px;
99
+ }
100
+ .header-banner h1 { color: white !important; font-size: 2rem !important; font-weight: 600 !important; margin: 0 0 6px 0 !important; }
101
+ .header-banner p { color: rgba(255,255,255,0.85) !important; font-size: 0.9rem !important; margin: 0 !important; }
102
+ .section-card { background: white; border-radius: 14px; padding: 28px; margin-bottom: 16px; border: 1px solid #e8ecf0; }
103
+ .section-label {
104
+ font-size: 0.7rem !important; font-weight: 600 !important;
105
+ letter-spacing: 0.12em !important; text-transform: uppercase !important;
106
+ color: #2980b9 !important; margin-bottom: 16px !important;
107
+ }
108
+ .vips-col-zero { border-top: 3px solid #e74c3c !important; border-radius: 10px; padding: 16px; }
109
+ .vips-col-few { border-top: 3px solid #2980b9 !important; border-radius: 10px; padding: 16px; }
110
+ .vips-col-cot { border-top: 3px solid #27ae60 !important; border-radius: 10px; padding: 16px; }
111
+ .gr-button-primary {
112
+ background: linear-gradient(135deg, #1a5276, #2980b9) !important;
113
+ border: none !important; border-radius: 10px !important; font-weight: 600 !important;
114
+ }
115
+ footer, .footer, .gradio-container > footer,
116
+ a[href*="gradio.app"], a[href*="/?view=api"] {
117
+ display: none !important;
118
+ visibility: hidden !important;
119
+ }
120
+ """
121
+
122
+
123
+ with gr.Blocks(title="VoiceNote AI") as demo:
124
+
125
+ gr.HTML(f"""
126
+ <div class="header-banner">
127
+ <h1>{Config.APP_NAME}</h1>
128
+ <p>VIPS-journalgenerering | Whisper KBLab -> GDPR -> Scaleway</p>
129
+ </div>
130
  """)
131
+
132
+ with gr.Group(elem_classes="section-card"):
133
+ gr.Markdown("##### INMATNING", elem_classes="section-label")
134
+ with gr.Row(equal_height=True):
135
+ audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath",
136
+ label="Ljud", scale=1)
137
+ text_input = gr.Textbox(label="Eller text", lines=5, scale=1,
138
+ placeholder="Klistra in patientsamtalet har...")
139
+ process_btn = gr.Button("Generera journalanteckning",
140
+ variant="primary", size="lg")
141
+
142
+ with gr.Group(elem_classes="section-card"):
143
+ gr.Markdown("##### RESULTAT", elem_classes="section-label")
144
+
145
+ with gr.Accordion("Pipeline-detaljer", open=False):
146
+ with gr.Row():
147
+ transcription_out = gr.Textbox(label="Transkription (SV)",
148
+ lines=5, interactive=True)
149
+ anonymized_out = gr.Textbox(label="Anonymiserad (SV)",
150
+ lines=5, interactive=False)
151
+
152
+ gr.Markdown("##### VIPS - TRE PROMPTSTRATEGIER", elem_classes="section-label")
153
+ with gr.Row():
154
+ with gr.Column(elem_classes="vips-col-zero"):
155
+ gr.HTML("<h4>Zero-shot</h4>")
156
+ zero_out = gr.Textbox(label="", lines=10, interactive=True)
157
+ with gr.Column(elem_classes="vips-col-few"):
158
+ gr.HTML("<h4>Few-shot</h4>")
159
+ few_out = gr.Textbox(label="", lines=10, interactive=True)
160
+ with gr.Column(elem_classes="vips-col-cot"):
161
+ gr.HTML("<h4>Chain-of-Thought</h4>")
162
+ cot_out = gr.Textbox(label="", lines=10, interactive=True)
163
+
164
+ with gr.Group(elem_classes="section-card"):
165
+ gr.Markdown("##### UTVARDERING", elem_classes="section-label")
166
+ gr.Markdown("**Del 1 - Jamforelse av promptstrategier**")
167
+ with gr.Row():
168
+ with gr.Column():
169
+ eval_complete = gr.Radio(choices=PROMPT_CHOICES,
170
+ label="1. Mest fullstandig?")
171
+ eval_hallucination = gr.Radio(choices=PROMPT_CHOICES,
172
+ label="2. Undvek bast att hitta pa information?")
173
+ with gr.Column():
174
+ eval_structure = gr.Radio(choices=PROMPT_CHOICES,
175
+ label="3. Foljde VIPS-strukturen bast?")
176
+ eval_clinical = gr.Radio(choices=PROMPT_CHOICES,
177
+ label="4. Skulle valjas i klinisk praktik?")
178
+ eval_comment = gr.Textbox(label="5. Kommentar", lines=3)
179
+
180
+ gr.Markdown("---\n**Del 2 - NASA-TLX** | *1 = lag, 7 = hog*")
181
+ with gr.Row():
182
+ with gr.Column():
183
+ tlx_mental = gr.Radio(choices=NASA_SCALE_STR, label="Mental")
184
+ tlx_physical = gr.Radio(choices=NASA_SCALE_STR, label="Fysisk")
185
+ tlx_temporal = gr.Radio(choices=NASA_SCALE_STR, label="Tidsbrist")
186
+ with gr.Column():
187
+ tlx_performance = gr.Radio(choices=NASA_SCALE_STR, label="Prestation")
188
+ tlx_effort = gr.Radio(choices=NASA_SCALE_STR, label="Anstrangning")
189
+ tlx_frustration = gr.Radio(choices=NASA_SCALE_STR, label="Frustration")
190
+
191
+ with gr.Row():
192
+ save_btn = gr.Button("Spara utvardering & ladda ner", variant="primary", scale=2)
193
+ clear_btn = gr.Button("Rensa all data fran granssnittet", variant="secondary", scale=1)
194
+
195
+ eval_status = gr.Textbox(label="", interactive=False,
196
+ placeholder="Status visas har efter sparning...")
197
+
198
+ download_file = gr.File(
199
+ label="Komplett resultat + utvardering (JSON) - klicka for att ladda ner",
200
+ interactive=False,
201
+ )
202
+
203
+ # Event handlers
204
+ process_btn.click(
205
+ fn=run_pipeline,
206
+ inputs=[audio_input, text_input],
207
+ outputs=[transcription_out, anonymized_out, zero_out, few_out, cot_out],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  )
209
+
210
+ def on_save(c, h, s, cl, cm, m, p, t, pe, e, f,
211
+ transcription, zero, few, cot):
212
+ """Combine pipeline results + evaluation into ONE downloadable file."""
213
+ if not any([c, h, s, cl]):
214
+ return "Fyll i minst ett svar i Del 1.", None
215
+
216
+ filled = [int(x) for x in [m, p, t, pe, e, f] if x]
217
+
218
+ entry = {
219
+ "timestamp": datetime.datetime.now().isoformat(),
220
+ "system": f"{Config.APP_NAME} v{Config.APP_VERSION}",
221
+
222
+ "pipeline_results": {
223
+ "transcription": transcription,
224
+ "vips": {
225
+ "zero_shot": zero,
226
+ "few_shot": few,
227
+ "chain_of_thought": cot,
228
+ },
229
+ },
230
+
231
+ "prompt_evaluation": {
232
+ "most_complete": c,
233
+ "least_hallucination": h,
234
+ "best_structure": s,
235
+ "clinical_choice": cl,
236
+ "comment": cm or "",
237
+ },
238
+
239
+ "nasa_tlx": {
240
+ "mental": m,
241
+ "physical": p,
242
+ "temporal": t,
243
+ "performance": pe,
244
+ "effort": e,
245
+ "frustration": f,
246
+ "total_avg": round(sum(filled)/len(filled), 2) if filled else None,
247
+ },
248
+ }
249
+
250
+ try:
251
+ save_evaluation(entry)
252
+ except Exception as ex:
253
+ logger.warning(f"Server save failed: {ex}")
254
+
255
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
256
+ filename = f"/tmp/voicenote_utvardering_{timestamp}.json"
257
+ with open(filename, "w", encoding="utf-8") as fh:
258
+ json.dump(entry, fh, ensure_ascii=False, indent=2)
259
+
260
+ return "Utvardering sparad! Fil klar for nedladdning nedan.", filename
261
+
262
  save_btn.click(
263
+ fn=on_save,
264
+ inputs=[eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
265
+ tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
266
+ transcription_out, zero_out, few_out, cot_out],
267
+ outputs=[eval_status, download_file],
268
  )
269
+
270
+ def clear_all():
271
+ """Reset all UI fields - no data remains in interface or memory."""
272
+ return (
273
+ None, "",
274
+ "", "", "", "", "",
275
+ None, None, None, None, "",
276
+ None, None, None, None, None, None,
277
+ "All data rensad fran granssnittet.",
278
+ None,
279
+ )
280
+
281
  clear_btn.click(
282
+ fn=clear_all,
283
+ inputs=[],
284
+ outputs=[
285
+ audio_input, text_input,
286
+ transcription_out, anonymized_out, zero_out, few_out, cot_out,
287
+ eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
288
+ tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
289
+ eval_status, download_file,
290
+ ],
291
  )
 
 
 
 
 
 
 
 
292
 
293
 
294
  if __name__ == "__main__":
295
+ demo.launch(css=custom_css)