Spaces:
Sleeping
Sleeping
updated audio model type
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import time
|
| 3 |
from transformers import pipeline
|
| 4 |
-
from datasets import load_dataset, Audio
|
| 5 |
|
| 6 |
st.set_page_config(page_title="🤗 Transformers Library examples",layout="wide")
|
| 7 |
|
|
@@ -156,19 +156,6 @@ def audio_classification():
|
|
| 156 |
|
| 157 |
# function forAutomatic Speech Recognition model
|
| 158 |
def automatic_speech_recognition():
|
| 159 |
-
minds = load_dataset("PolyAI/minds14", name="en-US", split="train")
|
| 160 |
-
minds = minds.train_test_split(test_size=0.2)
|
| 161 |
-
st.write(minds)
|
| 162 |
-
minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
|
| 163 |
-
st.write("minds[train][0] " , minds["train"][0])
|
| 164 |
-
labels = minds["train"].features["intent_class"].names
|
| 165 |
-
st.write("labels " ,labels)
|
| 166 |
-
label2id, id2label = dict(), dict()
|
| 167 |
-
for i, label in enumerate(labels):
|
| 168 |
-
label2id[label] = str(i)
|
| 169 |
-
id2label[str(i)] = label
|
| 170 |
-
st.write("label2id - id2label" , label2id , id2label)
|
| 171 |
-
|
| 172 |
code = '''
|
| 173 |
from transformers import pipeline
|
| 174 |
|
|
@@ -177,12 +164,14 @@ def automatic_speech_recognition():
|
|
| 177 |
'''
|
| 178 |
st.code(code, language='python')
|
| 179 |
if st.button("Run Test ", type="primary"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
with st.spinner('Wait for it...'):
|
| 181 |
time.sleep(5)
|
| 182 |
-
transcriber = pipeline(task="automatic-speech-recognition")
|
| 183 |
-
results = transcriber("./data/audio.m4a")
|
| 184 |
st.write("Output:")
|
| 185 |
-
st.success(
|
| 186 |
|
| 187 |
|
| 188 |
# function for Image Captioningn model
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import time
|
| 3 |
from transformers import pipeline
|
| 4 |
+
from datasets import load_dataset, Audio, Features
|
| 5 |
|
| 6 |
st.set_page_config(page_title="🤗 Transformers Library examples",layout="wide")
|
| 7 |
|
|
|
|
| 156 |
|
| 157 |
# function forAutomatic Speech Recognition model
|
| 158 |
def automatic_speech_recognition():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
code = '''
|
| 160 |
from transformers import pipeline
|
| 161 |
|
|
|
|
| 164 |
'''
|
| 165 |
st.code(code, language='python')
|
| 166 |
if st.button("Run Test ", type="primary"):
|
| 167 |
+
speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
| 168 |
+
dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
|
| 169 |
+
dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))
|
| 170 |
+
result = speech_recognizer(dataset[:4]["audio"])
|
| 171 |
with st.spinner('Wait for it...'):
|
| 172 |
time.sleep(5)
|
|
|
|
|
|
|
| 173 |
st.write("Output:")
|
| 174 |
+
st.success([d["text"] for d in result])
|
| 175 |
|
| 176 |
|
| 177 |
# function for Image Captioningn model
|