Spaces:
No application file
No application file
| # https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-speech-to-text?tabs=linux%2Cterminal&pivots=programming-language-python | |
| import numpy as np | |
| from utils import recognize_from_stream | |
| from azure.cognitiveservices.speech.audio import PushAudioInputStream, AudioStreamFormat | |
| import gradio as gr | |
| import os | |
| import time | |
| stream = PushAudioInputStream(AudioStreamFormat(48000,)) # sample rate is important | |
| msg_queue = [] | |
| chat_history = [ | |
| ] | |
| def rec_cb(evt): | |
| print("##########################") | |
| print(evt.result.text) | |
| if evt.result.text: | |
| chat_history.append({'role':'user', 'content':evt.result.text}) | |
| speech_recognizer = recognize_from_stream(stream,rec_cb) | |
| def transcribe(speech): | |
| sample_rate, speech = speech | |
| print(time.time(), (sample_rate, len(speech))) | |
| stream.write(speech.tobytes()) | |
| user_msg = [i['content'] for i in chat_history if i['role']=='user'] | |
| box_msg = [i['content'] for i in chat_history if i['role']=='assistant'] | |
| return list(zip(user_msg,box_msg)) | |
| demo = gr.Interface( | |
| title="实时语音识别", | |
| description="使用Azure的语音识别服务,实时识别麦克风输入的语音。", | |
| fn=transcribe, | |
| inputs=[ | |
| gr.Audio(source="microphone", type="numpy",streaming=True), | |
| ], | |
| outputs= [ | |
| gr.Chatbot(), | |
| ], | |
| live=True) | |
| if __name__ == '__main__': | |
| demo.launch(share=True,show_error=True) |