Spaces:
Runtime error
Runtime error
Commit
·
eb21f26
1
Parent(s):
a8e533c
Add desriptive output
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
|
| 4 |
pipe_fine = pipeline(model="zeihers-mart/whisper-small-swedish-basic", device_map="auto")
|
| 5 |
pipe_raw = pipeline(model="openai/whisper-small", device_map="auto")
|
|
@@ -19,24 +20,32 @@ pipe_raw.model.config.forced_decoder_ids = (
|
|
| 19 |
)
|
| 20 |
|
| 21 |
def transcribe(audio):
|
|
|
|
| 22 |
text_sv = pipe_fine(audio)["text"]
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
text_raw= pipe_raw(audio)["text"]
|
| 25 |
-
|
|
|
|
| 26 |
sentiment= sa(text_sv)
|
| 27 |
print(f"Sentiment result: {sentiment}")
|
| 28 |
sentiment= sentiment[0]["label"]
|
| 29 |
path = "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e0/SNice.svg/1200px-SNice.svg.png"
|
| 30 |
if sentiment == "NEGATIVE":
|
| 31 |
-
path = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/42/Sad_smiley_yellow_simple.svg/
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
|
| 34 |
iface = gr.Interface(
|
| 35 |
fn=transcribe,
|
| 36 |
inputs=gr.Audio(sources=["microphone"], type="filepath"),
|
| 37 |
outputs=[gr.Textbox(label="Fine-tuned transcription"),
|
| 38 |
gr.Textbox(label="Whisper transcription"),
|
| 39 |
-
gr.Image(label="Sentiment from Fine-tuned transcription", width=
|
|
|
|
| 40 |
title="Finetuned Whisper Swedish Small",
|
| 41 |
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
|
| 42 |
)
|
|
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import gradio as gr
|
| 3 |
+
import time
|
| 4 |
|
| 5 |
pipe_fine = pipeline(model="zeihers-mart/whisper-small-swedish-basic", device_map="auto")
|
| 6 |
pipe_raw = pipeline(model="openai/whisper-small", device_map="auto")
|
|
|
|
| 20 |
)
|
| 21 |
|
| 22 |
def transcribe(audio):
|
| 23 |
+
start = time.time()
|
| 24 |
text_sv = pipe_fine(audio)["text"]
|
| 25 |
+
time_fine = time.time() - start
|
| 26 |
+
print(f"Fine-tuned: audio transcribed in {time_fine} seconds: {text_sv}")
|
| 27 |
+
|
| 28 |
+
start = time.time()
|
| 29 |
text_raw= pipe_raw(audio)["text"]
|
| 30 |
+
time_raw = time.time() - start
|
| 31 |
+
print(f"Raw: audio transcribed in {time_raw} seconds: {text_raw}")
|
| 32 |
sentiment= sa(text_sv)
|
| 33 |
print(f"Sentiment result: {sentiment}")
|
| 34 |
sentiment= sentiment[0]["label"]
|
| 35 |
path = "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e0/SNice.svg/1200px-SNice.svg.png"
|
| 36 |
if sentiment == "NEGATIVE":
|
| 37 |
+
path = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/42/Sad_smiley_yellow_simple.svg/240px-Sad_smiley_yellow_simple.svg.png"
|
| 38 |
+
|
| 39 |
+
description = f"The fine-tuned model took {time_fine} seconds while the original Whisper model took {raw_time} seconds.\nThe sentiment was evaluated form the fine-tuned model transcription as {lower(sentiment)}."
|
| 40 |
+
return text_sv, text_raw, path, description
|
| 41 |
|
| 42 |
iface = gr.Interface(
|
| 43 |
fn=transcribe,
|
| 44 |
inputs=gr.Audio(sources=["microphone"], type="filepath"),
|
| 45 |
outputs=[gr.Textbox(label="Fine-tuned transcription"),
|
| 46 |
gr.Textbox(label="Whisper transcription"),
|
| 47 |
+
gr.Image(label="Sentiment from Fine-tuned transcription", width=250, height=250),
|
| 48 |
+
gr.Textbox(label="Description")],
|
| 49 |
title="Finetuned Whisper Swedish Small",
|
| 50 |
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
|
| 51 |
)
|