lingobot / utils /asr.py
gent
realtime asr
f96a97b
import azure.cognitiveservices.speech as speechsdk
from azure.cognitiveservices.speech.audio import PushAudioInputStream
import os
import time
# Replace with your own subscription key and service region
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY',''), region=os.environ.get('SPEECH_REGION',''))
speech_config.speech_recognition_language="en-US"
def recognize_from_file(file=None):
# This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
audio_config = speechsdk.audio.AudioConfig(filename=file)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
result = speech_recognizer.recognize_once_async().get()
return result.text
def recognize_from_stream(stream,rec_cb):
audio_config = speechsdk.audio.AudioConfig(stream=stream)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(rec_cb)
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition_async()
return speech_recognizer
if __name__ == '__main__':
## Recognize from file
# for audio_file in os.listdir("audio_samples"):
# start = time.time()
# print(recognize_from_file(f"audio_samples/{audio_file}"), " in ", time.time()-start)
## real-time recognition
def rec_cb(evt):
print("##########################")
print(evt.result.text)
stream = PushAudioInputStream()
stream.write(open("audio_samples/audo_0.wav","rb").read())
recognize_from_stream(stream,rec_cb)
time.sleep(10)