File size: 1,411 Bytes
57341a2
d9ec907
57341a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9ec907
 
 
 
 
 
 
 
 
 
 
 
57341a2
d9ec907
 
5063b74
d9ec907
5063b74
 
57341a2
d9ec907
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from transformers import pipeline, AutoModel, AutoTokenizer, AutoFeatureExtractor, Wav2Vec2ForCTC, AutoModelForSequenceClassification
import gradio as gr

model = Wav2Vec2ForCTC.from_pretrained("./asr")
tokenizer = AutoTokenizer.from_pretrained("./asr")
feature_extractor = AutoFeatureExtractor.from_pretrained("./asr")
asr = pipeline("automatic-speech-recognition",
               model=model,
               tokenizer=tokenizer,
               feature_extractor=feature_extractor
               )


model = AutoModelForSequenceClassification.from_pretrained("./tc")
tokenizer = AutoTokenizer.from_pretrained("./tc")
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

# asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
# classifier = pipeline("text-classification")

def speech_to_text(speech):
    text = asr(speech)["text"]
    return text

def text_to_sentiment(text):
    return classifier(text)[0]["label"]

demo = gr.Blocks()
with demo:
    audio_file = gr.Audio(type="filepath")
    text = gr.Textbox()
    # text2 = gr.Textbox()
    label = gr.Label()
    b1 = gr.Button("Recognize Speech")
    # b2 = gr.Button("Classify")
    b1.click(speech_to_text, inputs=audio_file, outputs=text)
    text.change(text_to_sentiment, inputs=text, outputs=label)
    # b2.click(text_to_sentiment, inputs=text, outputs=label)
    # text.change()
demo.launch()