lingobot / interfaces /realtime_asr.py
Gent (PG/R - Comp Sci & Elec Eng)
add demos
a20ccd6
# https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-speech-to-text?tabs=linux%2Cterminal&pivots=programming-language-python
import numpy as np
from utils import recognize_from_stream
from azure.cognitiveservices.speech.audio import PushAudioInputStream, AudioStreamFormat
import gradio as gr
import os
import time
stream = PushAudioInputStream(AudioStreamFormat(48000,)) # sample rate is important
msg_queue = []
chat_history = [
]
def rec_cb(evt):
print("##########################")
print(evt.result.text)
if evt.result.text:
chat_history.append({'role':'user', 'content':evt.result.text})
speech_recognizer = recognize_from_stream(stream,rec_cb)
def transcribe(speech):
sample_rate, speech = speech
print(time.time(), (sample_rate, len(speech)))
stream.write(speech.tobytes())
user_msg = [i['content'] for i in chat_history if i['role']=='user']
box_msg = [i['content'] for i in chat_history if i['role']=='assistant']
return list(zip(user_msg,box_msg))
demo = gr.Interface(
title="实时语音识别",
description="使用Azure的语音识别服务,实时识别麦克风输入的语音。",
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="numpy",streaming=True),
],
outputs= [
gr.Chatbot(),
],
live=True)
if __name__ == '__main__':
demo.launch(share=True,show_error=True)