x
File size: 11,128 Bytes
0586e99
68e523b
0586e99
 
68e523b
0586e99
 
 
68e523b
3f7070d
68e523b
 
0586e99
54eb47d
0586e99
 
68e523b
3f7070d
 
 
63d07e4
3f7070d
68e523b
63d07e4
3f7070d
68e523b
 
0586e99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68e523b
 
 
 
0586e99
 
68e523b
 
0586e99
68e523b
0586e99
 
 
 
 
 
 
 
68e523b
0586e99
 
 
 
 
 
 
68e523b
0586e99
 
 
68e523b
0586e99
 
68e523b
 
0586e99
 
 
 
68e523b
0586e99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68e523b
0586e99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f7070d
0586e99
 
 
 
 
 
 
3f7070d
fdbbe9a
3f7070d
 
0586e99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54eb47d
0586e99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c60b82
0586e99
 
 
 
 
1c60b82
0586e99
 
 
 
 
 
 
 
 
 
 
1c60b82
0586e99
 
 
 
 
 
 
 
 
1c60b82
 
 
ec5faa7
0586e99
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
import json
import logging
import datetime
import spaces
import gradio as gr

from config import Config, VIPS_CATEGORIES
from gdpr_filter import apply_gdpr_filter
from models import WhisperASR, MistralClient
from vips_classifier import classify_all, format_vips_for_display


logger = logging.getLogger(__name__)

asr_model      = WhisperASR()
mistral_client = None

def format_vips_output(vips_dict) -> str:
    if isinstance(vips_dict, dict):
        return format_vips_for_display(vips_dict)
    
    if not vips_dict or not str(vips_dict).strip():
        return "No output generated"
    
    return str(vips_dict).strip()


def _get_clients():
    global mistral_client
    if mistral_client is None:
        mistral_client = MistralClient()
    return mistral_client


@spaces.GPU
def run_pipeline_audio(audio):
    try:
        swedish_text = asr_model.transcribe(audio)
        if not swedish_text or not swedish_text.strip():
            return ("Transkriptionen ar tom.", "", "", "", "", "")
    except Exception as e:
        logger.exception("ASR failed")
        return (f"[FEL ASR]: {e}", "", "", "", "", "")
    return _run_common(swedish_text)


def run_pipeline_text(text_input):
    if not text_input or not text_input.strip():
        return ("Ingen text angiven.", "", "", "", "", "")
    return _run_common(text_input.strip())


def _run_common(swedish_text):
    logger.info("Running GDPR filter...")
    anonymized_sv = apply_gdpr_filter(swedish_text)

    try:
        mc = _get_clients()
    except Exception as e:
        logger.exception("Client init failed")
        return (swedish_text, anonymized_sv, f"[FEL]: {e}", "", "", "")

    logger.info("Running Scaleway LLM...")
    try:
        all_results = classify_all(anonymized_sv, mc)
        logger.info("Scaleway classification complete")
    except Exception as e:
        logger.exception("LLM failed")
        err = f"[FEL LLM]: {e}"
        return (swedish_text, anonymized_sv, err, err, err, err)

    zero_text = format_vips_output(all_results["zero_shot"])
    few_text  = format_vips_output(all_results["few_shot"])
    cot_text  = format_vips_output(all_results["chain_of_thought"])

    logger.info("Returning results to UI")
    return (swedish_text, anonymized_sv, zero_text, few_text, cot_text)


def run_pipeline(audio, text_input):
    if audio is not None:
        return run_pipeline_audio(audio)
    return run_pipeline_text(text_input)


PROMPT_CHOICES = ["Zero-shot", "Few-shot", "Chain-of-Thought"]
NASA_SCALE_STR = ["1", "2", "3", "4", "5", "6", "7"]

custom_css = """
@import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600&display=swap');
* { font-family: 'DM Sans', sans-serif !important; }
.gradio-container { background: #f0f4f8 !important; max-width: 1400px !important; margin: 0 auto; }
.header-banner {
    background: linear-gradient(135deg, #1a5276 0%, #2980b9 100%);
    border-radius: 16px; padding: 32px 40px; margin-bottom: 8px;
}
.header-banner h1 { color: white !important; font-size: 2rem !important; font-weight: 600 !important; margin: 0 0 6px 0 !important; }
.header-banner p  { color: rgba(255,255,255,0.85) !important; font-size: 0.9rem !important; margin: 0 !important; }
.section-card { background: white; border-radius: 14px; padding: 28px; margin-bottom: 16px; border: 1px solid #e8ecf0; }
.section-label {
    font-size: 0.7rem !important; font-weight: 600 !important;
    letter-spacing: 0.12em !important; text-transform: uppercase !important;
    color: #2980b9 !important; margin-bottom: 16px !important;
}
.vips-col-zero { border-top: 3px solid #e74c3c !important; border-radius: 10px; padding: 16px; }
.vips-col-few  { border-top: 3px solid #2980b9 !important; border-radius: 10px; padding: 16px; }
.vips-col-cot  { border-top: 3px solid #27ae60 !important; border-radius: 10px; padding: 16px; }
.gr-button-primary {
    background: linear-gradient(135deg, #1a5276, #2980b9) !important;
    border: none !important; border-radius: 10px !important; font-weight: 600 !important;
}
footer, .footer, .gradio-container > footer,
a[href*="gradio.app"], a[href*="/?view=api"] {
    display: none !important;
    visibility: hidden !important;
}
"""


with gr.Blocks(title="VoiceNote AI") as demo:

    gr.HTML(f"""
    <div class="header-banner">
      <h1>{Config.APP_NAME}</h1>
      <p>VIPS-journalgenerering | Whisper KBLab -> GDPR -> Scaleway</p>
    </div>
    """)

    with gr.Group(elem_classes="section-card"):
        gr.Markdown("##### INMATNING", elem_classes="section-label")
        with gr.Row(equal_height=True):
            audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath",
                                   label="Ljud", scale=1)
            text_input = gr.Textbox(label="Eller text", lines=5, scale=1,
                                    placeholder="Klistra in patientsamtalet har...")
        process_btn = gr.Button("Generera journalanteckning",
                                variant="primary", size="lg")

    with gr.Group(elem_classes="section-card"):
        gr.Markdown("##### RESULTAT", elem_classes="section-label")

        with gr.Accordion("Pipeline-detaljer", open=False):
            with gr.Row():
                transcription_out = gr.Textbox(label="Transkription (SV)",
                                               lines=5, interactive=True)
                anonymized_out = gr.Textbox(label="Anonymiserad (SV)",
                                            lines=5, interactive=False)

        gr.Markdown("##### VIPS - TRE PROMPTSTRATEGIER", elem_classes="section-label")
        
        with gr.Row():
            with gr.Column(elem_classes="vips-col-zero"):
                gr.HTML("<h4>Zero-shot</h4>")
                zero_out = gr.Textbox(label="", lines=10, interactive=True)
            with gr.Column(elem_classes="vips-col-few"):
                gr.HTML("<h4>Few-shot</h4>")
                few_out = gr.Textbox(label="", lines=10, interactive=True)
        
        with gr.Column(elem_classes="vips-col-cot"):
            gr.HTML("<h4>Chain-of-Thought</h4>")
            cot_out = gr.Textbox(label="", lines=10, interactive=True)

    with gr.Group(elem_classes="section-card"):
        gr.Markdown("##### UTVARDERING", elem_classes="section-label")
        gr.Markdown("**Del 1 - Jamforelse av promptstrategier**")
        with gr.Row():
            with gr.Column():
                eval_complete = gr.Radio(choices=PROMPT_CHOICES,
                    label="1. Mest fullstandig?")
                eval_hallucination = gr.Radio(choices=PROMPT_CHOICES,
                    label="2. Undvek bast att hitta pa information?")
            with gr.Column():
                eval_structure = gr.Radio(choices=PROMPT_CHOICES,
                    label="3. Foljde VIPS-strukturen bast?")
                eval_clinical = gr.Radio(choices=PROMPT_CHOICES,
                    label="4. Skulle valjas i klinisk praktik?")
        eval_comment = gr.Textbox(label="5. Kommentar", lines=3)

        gr.Markdown("---\n**Del 2 - NASA-TLX** | *1 = lag, 7 = hog*")
        with gr.Row():
            with gr.Column():
                tlx_mental = gr.Radio(choices=NASA_SCALE_STR, label="Mental")
                tlx_physical = gr.Radio(choices=NASA_SCALE_STR, label="Fysisk")
                tlx_temporal = gr.Radio(choices=NASA_SCALE_STR, label="Tidsbrist")
            with gr.Column():
                tlx_performance = gr.Radio(choices=NASA_SCALE_STR, label="Prestation")
                tlx_effort = gr.Radio(choices=NASA_SCALE_STR, label="Anstrangning")
                tlx_frustration = gr.Radio(choices=NASA_SCALE_STR, label="Frustration")

        with gr.Row():
            save_btn = gr.Button("Spara utvardering & ladda ner", variant="primary", scale=2)
            clear_btn = gr.Button("Rensa all data fran granssnittet", variant="secondary", scale=1)

        eval_status = gr.Textbox(label="", interactive=False,
                                 placeholder="Status visas har efter sparning...")

        download_file = gr.File(
            label="Komplett resultat + utvardering (JSON) - klicka for att ladda ner",
            interactive=False,
        )

    process_btn.click(
        fn=run_pipeline,
        inputs=[audio_input, text_input],
        outputs=[transcription_out, anonymized_out, zero_out, few_out, cot_out],
    )

    def on_save(c, h, s, cl, cm, m, p, t, pe, e, f,
                transcription, zero, few, cot):
        if not any([c, h, s, cl]):
            return "Fyll i minst ett svar i Del 1.", None

        filled = [int(x) for x in [m, p, t, pe, e, f] if x]

        entry = {
            "timestamp": datetime.datetime.now().isoformat(),
            "system": f"{Config.APP_NAME} v{Config.APP_VERSION}",

            "pipeline_results": {
                "transcription": transcription,
                "vips": {
                    "zero_shot":        zero,
                    "few_shot":         few,
                    "chain_of_thought": cot,
                },
            },

            "prompt_evaluation": {
                "most_complete":       c,
                "least_hallucination": h,
                "best_structure":      s,
                "clinical_choice":     cl,
                "comment":             cm or "",
            },

            "nasa_tlx": {
                "mental":       m,
                "physical":     p,
                "temporal":     t,
                "performance":  pe,
                "effort":       e,
                "frustration":  f,
                "total_avg":    round(sum(filled)/len(filled), 2) if filled else None,
            },
        }

        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"/tmp/voicenote_utvardering_{timestamp}.json"
        with open(filename, "w", encoding="utf-8") as fh:
            json.dump(entry, fh, ensure_ascii=False, indent=2)

        return "Utvardering sparad! Fil klar for nedladdning nedan.", filename

    save_btn.click(
        fn=on_save,
        inputs=[eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
                tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
                transcription_out, zero_out, few_out, cot_out],
        outputs=[eval_status, download_file],
    )

    def clear_all():
        return (
            None, "",
            "", "", "", "", "",
            None, None, None, None, "",
            None, None, None, None, None, None,
            "All data rensad fran granssnittet.",
            None,
        )

    clear_btn.click(
        fn=clear_all,
        inputs=[],
        outputs=[
            audio_input, text_input,
            transcription_out, anonymized_out, zero_out, few_out, cot_out,
            eval_complete, eval_hallucination, eval_structure, eval_clinical, eval_comment,
            tlx_mental, tlx_physical, tlx_temporal, tlx_performance, tlx_effort, tlx_frustration,
            eval_status, download_file,
        ],
    )


if __name__ == "__main__":
    demo.launch(css=custom_css)